gpustack-runner 0.1.24.post3__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/PKG-INFO +21 -21
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/README.md +20 -20
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.25/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/runner.py.json +132 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/cuda/Dockerfile +77 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/matrix.yaml +22 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/cuda/Dockerfile +17 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/matrix.yaml +56 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/rocm/Dockerfile +17 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/README.md +2 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/Dockerfile +15 -2
- gpustack_runner-0.1.25/pack/cann/patches/vllm_omni/001_wrong_patch.patch +13 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cuda/Dockerfile +25 -7
- gpustack_runner-0.1.25/pack/cuda/patches/vllm_omni/001_wrong_patch.patch +13 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/rocm/Dockerfile +119 -6
- gpustack_runner-0.1.25/pack/rocm/patches/vllm_omni/001_wrong_patch.patch +13 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +132 -0
- gpustack_runner-0.1.24.post3/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.codespelldict +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.codespellrc +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.gitattributes +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.gitignore +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.python-version +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/LICENSE +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/Makefile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/docs/index.md +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/__utils__.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/cmds/images.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/envs.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/runner.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/hatch.toml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/patches/vllm_001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/patches/vllm_001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/patches/mindie.zip +0 -0
- /gpustack_runner-0.1.24.post3/pack/cann/patches/vllm_001_wrong_dp_ray.patch → /gpustack_runner-0.1.25/pack/cann/patches/vllm/001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/corex/Dockerfile +0 -0
- /gpustack_runner-0.1.24.post3/pack/cuda/patches/vllm_001_wrong_dp_ray.patch → /gpustack_runner-0.1.25/pack/cuda/patches/vllm/001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/hggc/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/musa/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/prune_runner.sh +0 -0
- /gpustack_runner-0.1.24.post3/pack/rocm/patches/sglang_001_wrong_vram.patch → /gpustack_runner-0.1.25/pack/rocm/patches/sglang/001_wrong_vram.patch +0 -0
- /gpustack_runner-0.1.24.post3/pack/rocm/patches/vllm_001_wrong_dp_ray.patch → /gpustack_runner-0.1.25/pack/rocm/patches/vllm/001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/squash_expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/squash_image.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pyproject.toml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pytest.ini +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/ruff.toml +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_utils.py +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/activate +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/uv.lock +0 -0
- {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.25
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -52,17 +52,17 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
52
52
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
53
53
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
54
54
|
|
|
55
|
-
| CANN Version <br/> (Variant) | MindIE | vLLM
|
|
56
|
-
|
|
57
|
-
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
58
|
-
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
59
|
-
| 8.5 (310P) | `2.3.0` | `0.14.1`
|
|
60
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
61
|
-
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
62
|
-
| 8.3 (310P) | `2.2.rc1` |
|
|
63
|
-
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2
|
|
64
|
-
| 8.2 (910B) | `2.1.rc2` | `0.10.2`,
|
|
65
|
-
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2`
|
|
55
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
56
|
+
|------------------------------|-----------|-----------------------------------|------------------------|
|
|
57
|
+
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
58
|
+
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
59
|
+
| 8.5 (310P) | `2.3.0` | `0.14.1` | |
|
|
60
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
61
|
+
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
62
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
63
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
|
|
64
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
|
|
65
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
66
66
|
|
|
67
67
|
### Iluvatar CoreX
|
|
68
68
|
|
|
@@ -80,11 +80,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
80
80
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
81
81
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
82
82
|
|
|
83
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
84
|
-
|
|
85
|
-
| 12.9 | `0.14.1`,
|
|
86
|
-
| 12.8 | `0.
|
|
87
|
-
| 12.6 | `0.
|
|
83
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
84
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
|
|
85
|
+
| 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
|
|
86
|
+
| 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
|
|
87
|
+
| 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
|
|
88
88
|
|
|
89
89
|
### Hygon DTK
|
|
90
90
|
|
|
@@ -128,10 +128,10 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
128
128
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
129
129
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
130
130
|
|
|
131
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
132
|
-
|
|
133
|
-
| 7.0 |
|
|
134
|
-
| 6.4 |
|
|
131
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
132
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
|
|
133
|
+
| 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
|
|
134
|
+
| 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
|
|
135
135
|
|
|
136
136
|
## Directory Structure
|
|
137
137
|
|
|
@@ -32,17 +32,17 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
32
32
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
33
33
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
34
34
|
|
|
35
|
-
| CANN Version <br/> (Variant) | MindIE | vLLM
|
|
36
|
-
|
|
37
|
-
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
38
|
-
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
39
|
-
| 8.5 (310P) | `2.3.0` | `0.14.1`
|
|
40
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
41
|
-
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
42
|
-
| 8.3 (310P) | `2.2.rc1` |
|
|
43
|
-
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2
|
|
44
|
-
| 8.2 (910B) | `2.1.rc2` | `0.10.2`,
|
|
45
|
-
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2`
|
|
35
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
36
|
+
|------------------------------|-----------|-----------------------------------|------------------------|
|
|
37
|
+
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
38
|
+
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
39
|
+
| 8.5 (310P) | `2.3.0` | `0.14.1` | |
|
|
40
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
41
|
+
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
42
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
43
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
|
|
44
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
|
|
45
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
46
46
|
|
|
47
47
|
### Iluvatar CoreX
|
|
48
48
|
|
|
@@ -60,11 +60,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
60
60
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
61
61
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
62
62
|
|
|
63
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
64
|
-
|
|
65
|
-
| 12.9 | `0.14.1`,
|
|
66
|
-
| 12.8 | `0.
|
|
67
|
-
| 12.6 | `0.
|
|
63
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
64
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
|
|
65
|
+
| 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
|
|
66
|
+
| 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
|
|
67
|
+
| 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
|
|
68
68
|
|
|
69
69
|
### Hygon DTK
|
|
70
70
|
|
|
@@ -108,10 +108,10 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
108
108
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
109
109
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
110
110
|
|
|
111
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
112
|
-
|
|
113
|
-
| 7.0 |
|
|
114
|
-
| 6.4 |
|
|
111
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
112
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
|
|
113
|
+
| 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
|
|
114
|
+
| 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
|
|
115
115
|
|
|
116
116
|
## Directory Structure
|
|
117
117
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
30
|
+
__version__ = version = '0.1.25'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 25)
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "b005327"
|
|
@@ -868,6 +868,28 @@
|
|
|
868
868
|
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.6.post2",
|
|
869
869
|
"deprecated": false
|
|
870
870
|
},
|
|
871
|
+
{
|
|
872
|
+
"backend": "cuda",
|
|
873
|
+
"backend_version": "12.9",
|
|
874
|
+
"original_backend_version": "12.9.1",
|
|
875
|
+
"backend_variant": "",
|
|
876
|
+
"service": "vllm",
|
|
877
|
+
"service_version": "0.15.0",
|
|
878
|
+
"platform": "linux/amd64",
|
|
879
|
+
"docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
|
|
880
|
+
"deprecated": false
|
|
881
|
+
},
|
|
882
|
+
{
|
|
883
|
+
"backend": "cuda",
|
|
884
|
+
"backend_version": "12.9",
|
|
885
|
+
"original_backend_version": "12.9.1",
|
|
886
|
+
"backend_variant": "",
|
|
887
|
+
"service": "vllm",
|
|
888
|
+
"service_version": "0.15.0",
|
|
889
|
+
"platform": "linux/arm64",
|
|
890
|
+
"docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
|
|
891
|
+
"deprecated": false
|
|
892
|
+
},
|
|
871
893
|
{
|
|
872
894
|
"backend": "cuda",
|
|
873
895
|
"backend_version": "12.9",
|
|
@@ -1077,6 +1099,28 @@
|
|
|
1077
1099
|
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
|
|
1078
1100
|
"deprecated": true
|
|
1079
1101
|
},
|
|
1102
|
+
{
|
|
1103
|
+
"backend": "cuda",
|
|
1104
|
+
"backend_version": "12.8",
|
|
1105
|
+
"original_backend_version": "12.8.1",
|
|
1106
|
+
"backend_variant": "",
|
|
1107
|
+
"service": "vllm",
|
|
1108
|
+
"service_version": "0.15.0",
|
|
1109
|
+
"platform": "linux/amd64",
|
|
1110
|
+
"docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
|
|
1111
|
+
"deprecated": false
|
|
1112
|
+
},
|
|
1113
|
+
{
|
|
1114
|
+
"backend": "cuda",
|
|
1115
|
+
"backend_version": "12.8",
|
|
1116
|
+
"original_backend_version": "12.8.1",
|
|
1117
|
+
"backend_variant": "",
|
|
1118
|
+
"service": "vllm",
|
|
1119
|
+
"service_version": "0.15.0",
|
|
1120
|
+
"platform": "linux/arm64",
|
|
1121
|
+
"docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
|
|
1122
|
+
"deprecated": false
|
|
1123
|
+
},
|
|
1080
1124
|
{
|
|
1081
1125
|
"backend": "cuda",
|
|
1082
1126
|
"backend_version": "12.8",
|
|
@@ -1297,6 +1341,28 @@
|
|
|
1297
1341
|
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
|
|
1298
1342
|
"deprecated": true
|
|
1299
1343
|
},
|
|
1344
|
+
{
|
|
1345
|
+
"backend": "cuda",
|
|
1346
|
+
"backend_version": "12.6",
|
|
1347
|
+
"original_backend_version": "12.6.3",
|
|
1348
|
+
"backend_variant": "",
|
|
1349
|
+
"service": "vllm",
|
|
1350
|
+
"service_version": "0.15.0",
|
|
1351
|
+
"platform": "linux/amd64",
|
|
1352
|
+
"docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
|
|
1353
|
+
"deprecated": false
|
|
1354
|
+
},
|
|
1355
|
+
{
|
|
1356
|
+
"backend": "cuda",
|
|
1357
|
+
"backend_version": "12.6",
|
|
1358
|
+
"original_backend_version": "12.6.3",
|
|
1359
|
+
"backend_variant": "",
|
|
1360
|
+
"service": "vllm",
|
|
1361
|
+
"service_version": "0.15.0",
|
|
1362
|
+
"platform": "linux/arm64",
|
|
1363
|
+
"docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
|
|
1364
|
+
"deprecated": false
|
|
1365
|
+
},
|
|
1300
1366
|
{
|
|
1301
1367
|
"backend": "cuda",
|
|
1302
1368
|
"backend_version": "12.6",
|
|
@@ -1748,6 +1814,17 @@
|
|
|
1748
1814
|
"docker_image": "gpustack/runner:musa4.1-vllm0.9.2",
|
|
1749
1815
|
"deprecated": false
|
|
1750
1816
|
},
|
|
1817
|
+
{
|
|
1818
|
+
"backend": "rocm",
|
|
1819
|
+
"backend_version": "7.0",
|
|
1820
|
+
"original_backend_version": "7.0.2",
|
|
1821
|
+
"backend_variant": "",
|
|
1822
|
+
"service": "sglang",
|
|
1823
|
+
"service_version": "0.5.8",
|
|
1824
|
+
"platform": "linux/amd64",
|
|
1825
|
+
"docker_image": "gpustack/runner:rocm7.0-sglang0.5.8",
|
|
1826
|
+
"deprecated": false
|
|
1827
|
+
},
|
|
1751
1828
|
{
|
|
1752
1829
|
"backend": "rocm",
|
|
1753
1830
|
"backend_version": "7.0",
|
|
@@ -1770,6 +1847,28 @@
|
|
|
1770
1847
|
"docker_image": "gpustack/runner:rocm7.0-sglang0.5.6.post2",
|
|
1771
1848
|
"deprecated": false
|
|
1772
1849
|
},
|
|
1850
|
+
{
|
|
1851
|
+
"backend": "rocm",
|
|
1852
|
+
"backend_version": "7.0",
|
|
1853
|
+
"original_backend_version": "7.0.2",
|
|
1854
|
+
"backend_variant": "",
|
|
1855
|
+
"service": "vllm",
|
|
1856
|
+
"service_version": "0.15.0",
|
|
1857
|
+
"platform": "linux/amd64",
|
|
1858
|
+
"docker_image": "gpustack/runner:rocm7.0-vllm0.15.0",
|
|
1859
|
+
"deprecated": false
|
|
1860
|
+
},
|
|
1861
|
+
{
|
|
1862
|
+
"backend": "rocm",
|
|
1863
|
+
"backend_version": "7.0",
|
|
1864
|
+
"original_backend_version": "7.0.2",
|
|
1865
|
+
"backend_variant": "",
|
|
1866
|
+
"service": "vllm",
|
|
1867
|
+
"service_version": "0.14.1",
|
|
1868
|
+
"platform": "linux/amd64",
|
|
1869
|
+
"docker_image": "gpustack/runner:rocm7.0-vllm0.14.1",
|
|
1870
|
+
"deprecated": false
|
|
1871
|
+
},
|
|
1773
1872
|
{
|
|
1774
1873
|
"backend": "rocm",
|
|
1775
1874
|
"backend_version": "7.0",
|
|
@@ -1814,6 +1913,17 @@
|
|
|
1814
1913
|
"docker_image": "gpustack/runner:rocm7.0-vllm0.11.0",
|
|
1815
1914
|
"deprecated": true
|
|
1816
1915
|
},
|
|
1916
|
+
{
|
|
1917
|
+
"backend": "rocm",
|
|
1918
|
+
"backend_version": "6.4",
|
|
1919
|
+
"original_backend_version": "6.4.4",
|
|
1920
|
+
"backend_variant": "",
|
|
1921
|
+
"service": "sglang",
|
|
1922
|
+
"service_version": "0.5.8",
|
|
1923
|
+
"platform": "linux/amd64",
|
|
1924
|
+
"docker_image": "gpustack/runner:rocm6.4-sglang0.5.8",
|
|
1925
|
+
"deprecated": false
|
|
1926
|
+
},
|
|
1817
1927
|
{
|
|
1818
1928
|
"backend": "rocm",
|
|
1819
1929
|
"backend_version": "6.4",
|
|
@@ -1847,6 +1957,28 @@
|
|
|
1847
1957
|
"docker_image": "gpustack/runner:rocm6.4-sglang0.5.5.post3",
|
|
1848
1958
|
"deprecated": false
|
|
1849
1959
|
},
|
|
1960
|
+
{
|
|
1961
|
+
"backend": "rocm",
|
|
1962
|
+
"backend_version": "6.4",
|
|
1963
|
+
"original_backend_version": "6.4.4",
|
|
1964
|
+
"backend_variant": "",
|
|
1965
|
+
"service": "vllm",
|
|
1966
|
+
"service_version": "0.15.0",
|
|
1967
|
+
"platform": "linux/amd64",
|
|
1968
|
+
"docker_image": "gpustack/runner:rocm6.4-vllm0.15.0",
|
|
1969
|
+
"deprecated": false
|
|
1970
|
+
},
|
|
1971
|
+
{
|
|
1972
|
+
"backend": "rocm",
|
|
1973
|
+
"backend_version": "6.4",
|
|
1974
|
+
"original_backend_version": "6.4.4",
|
|
1975
|
+
"backend_variant": "",
|
|
1976
|
+
"service": "vllm",
|
|
1977
|
+
"service_version": "0.14.1",
|
|
1978
|
+
"platform": "linux/amd64",
|
|
1979
|
+
"docker_image": "gpustack/runner:rocm6.4-vllm0.14.1",
|
|
1980
|
+
"deprecated": false
|
|
1981
|
+
},
|
|
1850
1982
|
{
|
|
1851
1983
|
"backend": "rocm",
|
|
1852
1984
|
"backend_version": "6.4",
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CUDA_VERSION=12.8
|
|
3
|
+
ARG VLLM_VERSION=0.14.1
|
|
4
|
+
ARG SGLANG_VERSION=0.5.8
|
|
5
|
+
|
|
6
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
|
|
7
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
8
|
+
|
|
9
|
+
ARG TARGETPLATFORM
|
|
10
|
+
ARG TARGETOS
|
|
11
|
+
ARG TARGETARCH
|
|
12
|
+
|
|
13
|
+
## Update CuDNN and NCCL packages
|
|
14
|
+
|
|
15
|
+
RUN <<EOF
|
|
16
|
+
# Update CuDNN and NCCL packages
|
|
17
|
+
|
|
18
|
+
IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
|
|
19
|
+
|
|
20
|
+
# Install
|
|
21
|
+
cat <<EOT >/tmp/requirements.txt
|
|
22
|
+
nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
|
|
23
|
+
nvidia-cudnn-frontend>=1.17.0
|
|
24
|
+
nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
|
|
25
|
+
EOT
|
|
26
|
+
uv pip install \
|
|
27
|
+
-r /tmp/requirements.txt
|
|
28
|
+
|
|
29
|
+
# Review
|
|
30
|
+
uv pip tree
|
|
31
|
+
|
|
32
|
+
# Cleanup
|
|
33
|
+
rm -rf /var/tmp/* \
|
|
34
|
+
&& rm -rf /tmp/*
|
|
35
|
+
EOF
|
|
36
|
+
|
|
37
|
+
## Entrypoint
|
|
38
|
+
|
|
39
|
+
WORKDIR /
|
|
40
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
|
|
44
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
45
|
+
|
|
46
|
+
ARG TARGETPLATFORM
|
|
47
|
+
ARG TARGETOS
|
|
48
|
+
ARG TARGETARCH
|
|
49
|
+
|
|
50
|
+
## Update CuDNN and NCCL packages
|
|
51
|
+
|
|
52
|
+
RUN <<EOF
|
|
53
|
+
# Update CuDNN and NCCL packages
|
|
54
|
+
|
|
55
|
+
IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
|
|
56
|
+
|
|
57
|
+
# Install
|
|
58
|
+
cat <<EOT >/tmp/requirements.txt
|
|
59
|
+
nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
|
|
60
|
+
nvidia-cudnn-frontend>=1.17.0
|
|
61
|
+
nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
|
|
62
|
+
EOT
|
|
63
|
+
uv pip install \
|
|
64
|
+
-r /tmp/requirements.txt
|
|
65
|
+
|
|
66
|
+
# Review
|
|
67
|
+
uv pip tree
|
|
68
|
+
|
|
69
|
+
# Cleanup
|
|
70
|
+
rm -rf /var/tmp/* \
|
|
71
|
+
&& rm -rf /tmp/*
|
|
72
|
+
EOF
|
|
73
|
+
|
|
74
|
+
## Entrypoint
|
|
75
|
+
|
|
76
|
+
WORKDIR /
|
|
77
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# NVIDIA CUDA
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed NVIDIA CUDA 12.9.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cuda"
|
|
10
|
+
services:
|
|
11
|
+
- "vllm"
|
|
12
|
+
args:
|
|
13
|
+
- "CUDA_VERSION=12.9"
|
|
14
|
+
- "VLLM_VERSION=0.15.0"
|
|
15
|
+
- backend: "cuda"
|
|
16
|
+
services:
|
|
17
|
+
- "vllm"
|
|
18
|
+
- "sglang"
|
|
19
|
+
args:
|
|
20
|
+
- "CUDA_VERSION=12.9"
|
|
21
|
+
- "VLLM_VERSION=0.14.1"
|
|
22
|
+
- "SGLANG_VERSION=0.5.8"
|
gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/cuda/Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CUDA_VERSION=12.8
|
|
3
|
+
ARG SGLANG_VERSION=0.5.8
|
|
4
|
+
|
|
5
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
|
|
6
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
7
|
+
|
|
8
|
+
ARG TARGETPLATFORM
|
|
9
|
+
ARG TARGETOS
|
|
10
|
+
ARG TARGETARCH
|
|
11
|
+
|
|
12
|
+
## Entrypoint
|
|
13
|
+
|
|
14
|
+
ENV SGLANG_DISABLE_CUDNN_CHECK=1
|
|
15
|
+
|
|
16
|
+
WORKDIR /
|
|
17
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# NVIDIA CUDA
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed NVIDIA CUDA 12.9.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cuda"
|
|
10
|
+
services:
|
|
11
|
+
- "sglang"
|
|
12
|
+
args:
|
|
13
|
+
- "CUDA_VERSION=12.9"
|
|
14
|
+
- "VLLM_VERSION=0.14.1"
|
|
15
|
+
- "SGLANG_VERSION=0.5.8"
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# AMD ROCm
|
|
19
|
+
#
|
|
20
|
+
|
|
21
|
+
## Packed ROCm 7.0.
|
|
22
|
+
##
|
|
23
|
+
- backend: "rocm"
|
|
24
|
+
services:
|
|
25
|
+
- "sglang"
|
|
26
|
+
platforms:
|
|
27
|
+
- "linux/amd64"
|
|
28
|
+
args:
|
|
29
|
+
- "ROCM_VERSION=7.0"
|
|
30
|
+
- "SGLANG_VERSION=0.5.8"
|
|
31
|
+
- backend: "rocm"
|
|
32
|
+
services:
|
|
33
|
+
- "sglang"
|
|
34
|
+
platforms:
|
|
35
|
+
- "linux/amd64"
|
|
36
|
+
args:
|
|
37
|
+
- "ROCM_VERSION=7.0"
|
|
38
|
+
- "SGLANG_VERSION=0.5.7"
|
|
39
|
+
## Packed ROCm 6.4.
|
|
40
|
+
##
|
|
41
|
+
- backend: "rocm"
|
|
42
|
+
services:
|
|
43
|
+
- "sglang"
|
|
44
|
+
platforms:
|
|
45
|
+
- "linux/amd64"
|
|
46
|
+
args:
|
|
47
|
+
- "ROCM_VERSION=6.4"
|
|
48
|
+
- "SGLANG_VERSION=0.5.8"
|
|
49
|
+
- backend: "rocm"
|
|
50
|
+
services:
|
|
51
|
+
- "sglang"
|
|
52
|
+
platforms:
|
|
53
|
+
- "linux/amd64"
|
|
54
|
+
args:
|
|
55
|
+
- "ROCM_VERSION=6.4"
|
|
56
|
+
- "SGLANG_VERSION=0.5.7"
|
gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/rocm/Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG ROCM_VERSION=7.0
|
|
3
|
+
ARG SGLANG_VERSION=0.5.8
|
|
4
|
+
|
|
5
|
+
FROM gpustack/runner:rocm${ROCM_VERSION}-sglang${SGLANG_VERSION} AS sglang
|
|
6
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
7
|
+
|
|
8
|
+
ARG TARGETPLATFORM
|
|
9
|
+
ARG TARGETOS
|
|
10
|
+
ARG TARGETARCH
|
|
11
|
+
|
|
12
|
+
## Entrypoint
|
|
13
|
+
|
|
14
|
+
ENV SGLANG_DISABLE_CUDNN_CHECK=1
|
|
15
|
+
|
|
16
|
+
WORKDIR /
|
|
17
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -35,3 +35,5 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
|
|
|
35
35
|
- [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
|
|
36
36
|
- [x] 2026-01-29: Apply DP deployment patches to vLLM 0.13.0 for CUDA/ROCm released images.
|
|
37
37
|
- [x] 2026-01-29: Reinstall SGLang Kernel for SGLang 0.5.7 of CANN released images.
|
|
38
|
+
- [x] 2026-02-03: Apply several patches to vLLM 0.14.1/0.15.0 and SGLang 0.5.8 for CUDA 12.9 released images.
|
|
39
|
+
- [x] 2026-02-03: Patch SGLang 0.5.8/0.5.7 of CUDA/ROCm released images to disable CuDNN version check.
|
|
@@ -59,7 +59,7 @@ ARG VLLM_VERSION=0.14.1
|
|
|
59
59
|
ARG VLLM_ASCEND_VERSION=0.14.0rc1
|
|
60
60
|
ARG VLLM_TORCH_VERSION=2.9.0
|
|
61
61
|
ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
|
|
62
|
-
ARG VLLM_OMNI_COMMIT=
|
|
62
|
+
ARG VLLM_OMNI_COMMIT=de2cac9
|
|
63
63
|
ARG SGLANG_BASE_IMAGE=gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-python${PYTHON_VERSION}
|
|
64
64
|
ARG SGLANG_VERSION=0.5.8
|
|
65
65
|
ARG SGLANG_TORCH_VERSION=2.8.0
|
|
@@ -865,6 +865,15 @@ RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
|
|
|
865
865
|
uv pip install --no-build-isolation \
|
|
866
866
|
/omni/workspace/*.whl
|
|
867
867
|
|
|
868
|
+
# Dependencies
|
|
869
|
+
uv pip uninstall onnxruntime || true
|
|
870
|
+
cat <<EOT >/tmp/requirements.txt
|
|
871
|
+
onnxruntime-cann
|
|
872
|
+
sox
|
|
873
|
+
EOT
|
|
874
|
+
uv pip install \
|
|
875
|
+
-r /tmp/requirements.txt
|
|
876
|
+
|
|
868
877
|
# Cleanup
|
|
869
878
|
rm -rf /var/tmp/* \
|
|
870
879
|
&& rm -rf /tmp/*
|
|
@@ -956,7 +965,11 @@ RUN --mount=type=bind,target=/workspace,rw <<EOF
|
|
|
956
965
|
|
|
957
966
|
tree -hs /workspace/patches
|
|
958
967
|
pushd $(pip show vllm | grep Location: | cut -d" " -f 2) \
|
|
959
|
-
&& patch -p1 < /workspace/patches/
|
|
968
|
+
&& patch -p1 < /workspace/patches/vllm/*.patch
|
|
969
|
+
if pip show vllm_omni > /dev/null 2>&1; then \
|
|
970
|
+
pushd $(pip show vllm_omni | grep Location: | cut -d" " -f 2) \
|
|
971
|
+
&& patch -p1 < /workspace/patches/vllm_omni/*.patch; \
|
|
972
|
+
fi
|
|
960
973
|
EOF
|
|
961
974
|
|
|
962
975
|
## Entrypoint
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
diff --git a/vllm_omni/patch.py b/vllm_omni/patch.py
|
|
2
|
+
index 687ff51..6b67924 100644
|
|
3
|
+
--- a/vllm_omni/patch.py
|
|
4
|
+
+++ b/vllm_omni/patch.py
|
|
5
|
+
@@ -19,6 +19,8 @@ for module_name, module in sys.modules.items():
|
|
6
|
+
# only do patch on module of vllm, pass others
|
|
7
|
+
if "vllm" not in module_name:
|
|
8
|
+
continue
|
|
9
|
+
+ if "--omni" not in sys.argv:
|
|
10
|
+
+ continue
|
|
11
|
+
if hasattr(module, "EngineCoreOutput") and module.EngineCoreOutput == _OriginalEngineCoreOutput:
|
|
12
|
+
module.EngineCoreOutput = OmniEngineCoreOutput
|
|
13
|
+
if hasattr(module, "EngineCoreOutputs") and module.EngineCoreOutputs == _OriginalEngineCoreOutputs:
|