gpustack-runner 0.1.24.post4__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/PKG-INFO +21 -21
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/README.md +20 -20
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.25/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/runner.py.json +88 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/cuda/Dockerfile +77 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/matrix.yaml +22 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/cuda/Dockerfile +17 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/matrix.yaml +56 -0
- gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/rocm/Dockerfile +17 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/README.md +2 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/Dockerfile +1 -1
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cuda/Dockerfile +12 -6
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/Dockerfile +99 -4
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +88 -0
- gpustack_runner-0.1.24.post4/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.codespelldict +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.codespellrc +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.gitattributes +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.gitignore +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.python-version +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/LICENSE +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/Makefile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/docs/index.md +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/__utils__.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/cmds/images.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/envs.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/runner.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/hatch.toml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/patches/vllm_001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/patches/vllm_001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/patches/vllm/001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/patches/vllm_omni/001_wrong_patch.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/corex/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cuda/patches/vllm/001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cuda/patches/vllm_omni/001_wrong_patch.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/hggc/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/musa/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/patches/sglang/001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/patches/vllm/001_wrong_dp_ray.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/patches/vllm_omni/001_wrong_patch.patch +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/squash_expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/squash_image.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pyproject.toml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pytest.ini +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/ruff.toml +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_utils.py +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/activate +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/uv.lock +0 -0
- {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.25
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -52,17 +52,17 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
52
52
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
53
53
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
54
54
|
|
|
55
|
-
| CANN Version <br/> (Variant) | MindIE | vLLM
|
|
56
|
-
|
|
57
|
-
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
58
|
-
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
59
|
-
| 8.5 (310P) | `2.3.0` | `0.14.1`
|
|
60
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
61
|
-
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
62
|
-
| 8.3 (310P) | `2.2.rc1` |
|
|
63
|
-
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2
|
|
64
|
-
| 8.2 (910B) | `2.1.rc2` | `0.10.2`,
|
|
65
|
-
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2`
|
|
55
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
56
|
+
|------------------------------|-----------|-----------------------------------|------------------------|
|
|
57
|
+
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
58
|
+
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
59
|
+
| 8.5 (310P) | `2.3.0` | `0.14.1` | |
|
|
60
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
61
|
+
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
62
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
63
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
|
|
64
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
|
|
65
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
66
66
|
|
|
67
67
|
### Iluvatar CoreX
|
|
68
68
|
|
|
@@ -80,11 +80,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
80
80
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
81
81
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
82
82
|
|
|
83
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
84
|
-
|
|
85
|
-
| 12.9 | `0.14.1`,
|
|
86
|
-
| 12.8 | `0.
|
|
87
|
-
| 12.6 | `0.
|
|
83
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
84
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
|
|
85
|
+
| 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
|
|
86
|
+
| 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
|
|
87
|
+
| 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
|
|
88
88
|
|
|
89
89
|
### Hygon DTK
|
|
90
90
|
|
|
@@ -128,10 +128,10 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
128
128
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
129
129
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
130
130
|
|
|
131
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
132
|
-
|
|
133
|
-
| 7.0 | `0.14.1`,
|
|
134
|
-
| 6.4 | `0.14.1`,
|
|
131
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
132
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
|
|
133
|
+
| 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
|
|
134
|
+
| 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
|
|
135
135
|
|
|
136
136
|
## Directory Structure
|
|
137
137
|
|
|
@@ -32,17 +32,17 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
32
32
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
33
33
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
34
34
|
|
|
35
|
-
| CANN Version <br/> (Variant) | MindIE | vLLM
|
|
36
|
-
|
|
37
|
-
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
38
|
-
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
39
|
-
| 8.5 (310P) | `2.3.0` | `0.14.1`
|
|
40
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
41
|
-
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
42
|
-
| 8.3 (310P) | `2.2.rc1` |
|
|
43
|
-
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2
|
|
44
|
-
| 8.2 (910B) | `2.1.rc2` | `0.10.2`,
|
|
45
|
-
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2`
|
|
35
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
36
|
+
|------------------------------|-----------|-----------------------------------|------------------------|
|
|
37
|
+
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
38
|
+
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
39
|
+
| 8.5 (310P) | `2.3.0` | `0.14.1` | |
|
|
40
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
41
|
+
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
42
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
43
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
|
|
44
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
|
|
45
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
46
46
|
|
|
47
47
|
### Iluvatar CoreX
|
|
48
48
|
|
|
@@ -60,11 +60,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
60
60
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
61
61
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
62
62
|
|
|
63
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
64
|
-
|
|
65
|
-
| 12.9 | `0.14.1`,
|
|
66
|
-
| 12.8 | `0.
|
|
67
|
-
| 12.6 | `0.
|
|
63
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
64
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
|
|
65
|
+
| 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
|
|
66
|
+
| 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
|
|
67
|
+
| 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
|
|
68
68
|
|
|
69
69
|
### Hygon DTK
|
|
70
70
|
|
|
@@ -108,10 +108,10 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
108
108
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
109
109
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
110
110
|
|
|
111
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
112
|
-
|
|
113
|
-
| 7.0 | `0.14.1`,
|
|
114
|
-
| 6.4 | `0.14.1`,
|
|
111
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
112
|
+
|------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
|
|
113
|
+
| 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
|
|
114
|
+
| 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
|
|
115
115
|
|
|
116
116
|
## Directory Structure
|
|
117
117
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
30
|
+
__version__ = version = '0.1.25'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 25)
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "b005327"
|
|
@@ -868,6 +868,28 @@
|
|
|
868
868
|
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.6.post2",
|
|
869
869
|
"deprecated": false
|
|
870
870
|
},
|
|
871
|
+
{
|
|
872
|
+
"backend": "cuda",
|
|
873
|
+
"backend_version": "12.9",
|
|
874
|
+
"original_backend_version": "12.9.1",
|
|
875
|
+
"backend_variant": "",
|
|
876
|
+
"service": "vllm",
|
|
877
|
+
"service_version": "0.15.0",
|
|
878
|
+
"platform": "linux/amd64",
|
|
879
|
+
"docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
|
|
880
|
+
"deprecated": false
|
|
881
|
+
},
|
|
882
|
+
{
|
|
883
|
+
"backend": "cuda",
|
|
884
|
+
"backend_version": "12.9",
|
|
885
|
+
"original_backend_version": "12.9.1",
|
|
886
|
+
"backend_variant": "",
|
|
887
|
+
"service": "vllm",
|
|
888
|
+
"service_version": "0.15.0",
|
|
889
|
+
"platform": "linux/arm64",
|
|
890
|
+
"docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
|
|
891
|
+
"deprecated": false
|
|
892
|
+
},
|
|
871
893
|
{
|
|
872
894
|
"backend": "cuda",
|
|
873
895
|
"backend_version": "12.9",
|
|
@@ -1077,6 +1099,28 @@
|
|
|
1077
1099
|
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
|
|
1078
1100
|
"deprecated": true
|
|
1079
1101
|
},
|
|
1102
|
+
{
|
|
1103
|
+
"backend": "cuda",
|
|
1104
|
+
"backend_version": "12.8",
|
|
1105
|
+
"original_backend_version": "12.8.1",
|
|
1106
|
+
"backend_variant": "",
|
|
1107
|
+
"service": "vllm",
|
|
1108
|
+
"service_version": "0.15.0",
|
|
1109
|
+
"platform": "linux/amd64",
|
|
1110
|
+
"docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
|
|
1111
|
+
"deprecated": false
|
|
1112
|
+
},
|
|
1113
|
+
{
|
|
1114
|
+
"backend": "cuda",
|
|
1115
|
+
"backend_version": "12.8",
|
|
1116
|
+
"original_backend_version": "12.8.1",
|
|
1117
|
+
"backend_variant": "",
|
|
1118
|
+
"service": "vllm",
|
|
1119
|
+
"service_version": "0.15.0",
|
|
1120
|
+
"platform": "linux/arm64",
|
|
1121
|
+
"docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
|
|
1122
|
+
"deprecated": false
|
|
1123
|
+
},
|
|
1080
1124
|
{
|
|
1081
1125
|
"backend": "cuda",
|
|
1082
1126
|
"backend_version": "12.8",
|
|
@@ -1297,6 +1341,28 @@
|
|
|
1297
1341
|
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
|
|
1298
1342
|
"deprecated": true
|
|
1299
1343
|
},
|
|
1344
|
+
{
|
|
1345
|
+
"backend": "cuda",
|
|
1346
|
+
"backend_version": "12.6",
|
|
1347
|
+
"original_backend_version": "12.6.3",
|
|
1348
|
+
"backend_variant": "",
|
|
1349
|
+
"service": "vllm",
|
|
1350
|
+
"service_version": "0.15.0",
|
|
1351
|
+
"platform": "linux/amd64",
|
|
1352
|
+
"docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
|
|
1353
|
+
"deprecated": false
|
|
1354
|
+
},
|
|
1355
|
+
{
|
|
1356
|
+
"backend": "cuda",
|
|
1357
|
+
"backend_version": "12.6",
|
|
1358
|
+
"original_backend_version": "12.6.3",
|
|
1359
|
+
"backend_variant": "",
|
|
1360
|
+
"service": "vllm",
|
|
1361
|
+
"service_version": "0.15.0",
|
|
1362
|
+
"platform": "linux/arm64",
|
|
1363
|
+
"docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
|
|
1364
|
+
"deprecated": false
|
|
1365
|
+
},
|
|
1300
1366
|
{
|
|
1301
1367
|
"backend": "cuda",
|
|
1302
1368
|
"backend_version": "12.6",
|
|
@@ -1781,6 +1847,17 @@
|
|
|
1781
1847
|
"docker_image": "gpustack/runner:rocm7.0-sglang0.5.6.post2",
|
|
1782
1848
|
"deprecated": false
|
|
1783
1849
|
},
|
|
1850
|
+
{
|
|
1851
|
+
"backend": "rocm",
|
|
1852
|
+
"backend_version": "7.0",
|
|
1853
|
+
"original_backend_version": "7.0.2",
|
|
1854
|
+
"backend_variant": "",
|
|
1855
|
+
"service": "vllm",
|
|
1856
|
+
"service_version": "0.15.0",
|
|
1857
|
+
"platform": "linux/amd64",
|
|
1858
|
+
"docker_image": "gpustack/runner:rocm7.0-vllm0.15.0",
|
|
1859
|
+
"deprecated": false
|
|
1860
|
+
},
|
|
1784
1861
|
{
|
|
1785
1862
|
"backend": "rocm",
|
|
1786
1863
|
"backend_version": "7.0",
|
|
@@ -1880,6 +1957,17 @@
|
|
|
1880
1957
|
"docker_image": "gpustack/runner:rocm6.4-sglang0.5.5.post3",
|
|
1881
1958
|
"deprecated": false
|
|
1882
1959
|
},
|
|
1960
|
+
{
|
|
1961
|
+
"backend": "rocm",
|
|
1962
|
+
"backend_version": "6.4",
|
|
1963
|
+
"original_backend_version": "6.4.4",
|
|
1964
|
+
"backend_variant": "",
|
|
1965
|
+
"service": "vllm",
|
|
1966
|
+
"service_version": "0.15.0",
|
|
1967
|
+
"platform": "linux/amd64",
|
|
1968
|
+
"docker_image": "gpustack/runner:rocm6.4-vllm0.15.0",
|
|
1969
|
+
"deprecated": false
|
|
1970
|
+
},
|
|
1883
1971
|
{
|
|
1884
1972
|
"backend": "rocm",
|
|
1885
1973
|
"backend_version": "6.4",
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CUDA_VERSION=12.8
|
|
3
|
+
ARG VLLM_VERSION=0.14.1
|
|
4
|
+
ARG SGLANG_VERSION=0.5.8
|
|
5
|
+
|
|
6
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
|
|
7
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
8
|
+
|
|
9
|
+
ARG TARGETPLATFORM
|
|
10
|
+
ARG TARGETOS
|
|
11
|
+
ARG TARGETARCH
|
|
12
|
+
|
|
13
|
+
## Update CuDNN and NCCL packages
|
|
14
|
+
|
|
15
|
+
RUN <<EOF
|
|
16
|
+
# Update CuDNN and NCCL packages
|
|
17
|
+
|
|
18
|
+
IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
|
|
19
|
+
|
|
20
|
+
# Install
|
|
21
|
+
cat <<EOT >/tmp/requirements.txt
|
|
22
|
+
nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
|
|
23
|
+
nvidia-cudnn-frontend>=1.17.0
|
|
24
|
+
nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
|
|
25
|
+
EOT
|
|
26
|
+
uv pip install \
|
|
27
|
+
-r /tmp/requirements.txt
|
|
28
|
+
|
|
29
|
+
# Review
|
|
30
|
+
uv pip tree
|
|
31
|
+
|
|
32
|
+
# Cleanup
|
|
33
|
+
rm -rf /var/tmp/* \
|
|
34
|
+
&& rm -rf /tmp/*
|
|
35
|
+
EOF
|
|
36
|
+
|
|
37
|
+
## Entrypoint
|
|
38
|
+
|
|
39
|
+
WORKDIR /
|
|
40
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
|
|
44
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
45
|
+
|
|
46
|
+
ARG TARGETPLATFORM
|
|
47
|
+
ARG TARGETOS
|
|
48
|
+
ARG TARGETARCH
|
|
49
|
+
|
|
50
|
+
## Update CuDNN and NCCL packages
|
|
51
|
+
|
|
52
|
+
RUN <<EOF
|
|
53
|
+
# Update CuDNN and NCCL packages
|
|
54
|
+
|
|
55
|
+
IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
|
|
56
|
+
|
|
57
|
+
# Install
|
|
58
|
+
cat <<EOT >/tmp/requirements.txt
|
|
59
|
+
nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
|
|
60
|
+
nvidia-cudnn-frontend>=1.17.0
|
|
61
|
+
nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
|
|
62
|
+
EOT
|
|
63
|
+
uv pip install \
|
|
64
|
+
-r /tmp/requirements.txt
|
|
65
|
+
|
|
66
|
+
# Review
|
|
67
|
+
uv pip tree
|
|
68
|
+
|
|
69
|
+
# Cleanup
|
|
70
|
+
rm -rf /var/tmp/* \
|
|
71
|
+
&& rm -rf /tmp/*
|
|
72
|
+
EOF
|
|
73
|
+
|
|
74
|
+
## Entrypoint
|
|
75
|
+
|
|
76
|
+
WORKDIR /
|
|
77
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# NVIDIA CUDA
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed NVIDIA CUDA 12.9.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cuda"
|
|
10
|
+
services:
|
|
11
|
+
- "vllm"
|
|
12
|
+
args:
|
|
13
|
+
- "CUDA_VERSION=12.9"
|
|
14
|
+
- "VLLM_VERSION=0.15.0"
|
|
15
|
+
- backend: "cuda"
|
|
16
|
+
services:
|
|
17
|
+
- "vllm"
|
|
18
|
+
- "sglang"
|
|
19
|
+
args:
|
|
20
|
+
- "CUDA_VERSION=12.9"
|
|
21
|
+
- "VLLM_VERSION=0.14.1"
|
|
22
|
+
- "SGLANG_VERSION=0.5.8"
|
gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/cuda/Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CUDA_VERSION=12.8
|
|
3
|
+
ARG SGLANG_VERSION=0.5.8
|
|
4
|
+
|
|
5
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
|
|
6
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
7
|
+
|
|
8
|
+
ARG TARGETPLATFORM
|
|
9
|
+
ARG TARGETOS
|
|
10
|
+
ARG TARGETARCH
|
|
11
|
+
|
|
12
|
+
## Entrypoint
|
|
13
|
+
|
|
14
|
+
ENV SGLANG_DISABLE_CUDNN_CHECK=1
|
|
15
|
+
|
|
16
|
+
WORKDIR /
|
|
17
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# NVIDIA CUDA
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed NVIDIA CUDA 12.9.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cuda"
|
|
10
|
+
services:
|
|
11
|
+
- "sglang"
|
|
12
|
+
args:
|
|
13
|
+
- "CUDA_VERSION=12.9"
|
|
14
|
+
- "VLLM_VERSION=0.14.1"
|
|
15
|
+
- "SGLANG_VERSION=0.5.8"
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# AMD ROCm
|
|
19
|
+
#
|
|
20
|
+
|
|
21
|
+
## Packed ROCm 7.0.
|
|
22
|
+
##
|
|
23
|
+
- backend: "rocm"
|
|
24
|
+
services:
|
|
25
|
+
- "sglang"
|
|
26
|
+
platforms:
|
|
27
|
+
- "linux/amd64"
|
|
28
|
+
args:
|
|
29
|
+
- "ROCM_VERSION=7.0"
|
|
30
|
+
- "SGLANG_VERSION=0.5.8"
|
|
31
|
+
- backend: "rocm"
|
|
32
|
+
services:
|
|
33
|
+
- "sglang"
|
|
34
|
+
platforms:
|
|
35
|
+
- "linux/amd64"
|
|
36
|
+
args:
|
|
37
|
+
- "ROCM_VERSION=7.0"
|
|
38
|
+
- "SGLANG_VERSION=0.5.7"
|
|
39
|
+
## Packed ROCm 6.4.
|
|
40
|
+
##
|
|
41
|
+
- backend: "rocm"
|
|
42
|
+
services:
|
|
43
|
+
- "sglang"
|
|
44
|
+
platforms:
|
|
45
|
+
- "linux/amd64"
|
|
46
|
+
args:
|
|
47
|
+
- "ROCM_VERSION=6.4"
|
|
48
|
+
- "SGLANG_VERSION=0.5.8"
|
|
49
|
+
- backend: "rocm"
|
|
50
|
+
services:
|
|
51
|
+
- "sglang"
|
|
52
|
+
platforms:
|
|
53
|
+
- "linux/amd64"
|
|
54
|
+
args:
|
|
55
|
+
- "ROCM_VERSION=6.4"
|
|
56
|
+
- "SGLANG_VERSION=0.5.7"
|
gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/rocm/Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG ROCM_VERSION=7.0
|
|
3
|
+
ARG SGLANG_VERSION=0.5.8
|
|
4
|
+
|
|
5
|
+
FROM gpustack/runner:rocm${ROCM_VERSION}-sglang${SGLANG_VERSION} AS sglang
|
|
6
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
7
|
+
|
|
8
|
+
ARG TARGETPLATFORM
|
|
9
|
+
ARG TARGETOS
|
|
10
|
+
ARG TARGETARCH
|
|
11
|
+
|
|
12
|
+
## Entrypoint
|
|
13
|
+
|
|
14
|
+
ENV SGLANG_DISABLE_CUDNN_CHECK=1
|
|
15
|
+
|
|
16
|
+
WORKDIR /
|
|
17
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -35,3 +35,5 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
|
|
|
35
35
|
- [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
|
|
36
36
|
- [x] 2026-01-29: Apply DP deployment patches to vLLM 0.13.0 for CUDA/ROCm released images.
|
|
37
37
|
- [x] 2026-01-29: Reinstall SGLang Kernel for SGLang 0.5.7 of CANN released images.
|
|
38
|
+
- [x] 2026-02-03: Apply several patches to vLLM 0.14.1/0.15.0 and SGLang 0.5.8 for CUDA 12.9 released images.
|
|
39
|
+
- [x] 2026-02-03: Patch SGLang 0.5.8/0.5.7 of CUDA/ROCm released images to disable CuDNN version check.
|
|
@@ -59,7 +59,7 @@ ARG VLLM_VERSION=0.14.1
|
|
|
59
59
|
ARG VLLM_ASCEND_VERSION=0.14.0rc1
|
|
60
60
|
ARG VLLM_TORCH_VERSION=2.9.0
|
|
61
61
|
ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
|
|
62
|
-
ARG VLLM_OMNI_COMMIT=
|
|
62
|
+
ARG VLLM_OMNI_COMMIT=de2cac9
|
|
63
63
|
ARG SGLANG_BASE_IMAGE=gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-python${PYTHON_VERSION}
|
|
64
64
|
ARG SGLANG_VERSION=0.5.8
|
|
65
65
|
ARG SGLANG_TORCH_VERSION=2.8.0
|
|
@@ -101,7 +101,7 @@ ARG VOXBOX_VERSION=0.0.21
|
|
|
101
101
|
ARG VOXBOX_TORCH_VERSION=2.7.1
|
|
102
102
|
ARG VOXBOX_TORCH_CUDA_VERSION=${CUDA_VERSION}
|
|
103
103
|
ARG VLLM_BASE_IMAGE=gpustack/runner:cuda${CUDA_VERSION}-python${PYTHON_VERSION}
|
|
104
|
-
ARG VLLM_VERSION=0.
|
|
104
|
+
ARG VLLM_VERSION=0.15.0
|
|
105
105
|
ARG VLLM_TORCH_VERSION=2.9.1
|
|
106
106
|
ARG VLLM_TORCH_CUDA_VERSION=${CUDA_VERSION}
|
|
107
107
|
ARG VLLM_BUILD_BASE_IMAGE=gpustack/runner:cuda${VLLM_TORCH_CUDA_VERSION}-python${PYTHON_VERSION}
|
|
@@ -111,12 +111,12 @@ ARG VLLM_NVIDIA_NVSHMEM_VERSION=3.4.5
|
|
|
111
111
|
ARG VLLM_AWS_EFA_VERSION=1.44.0
|
|
112
112
|
ARG VLLM_PPLX_KERNEL_COMMIT=12cecfda
|
|
113
113
|
ARG VLLM_DEEPEP_COMMIT=b57e5e21
|
|
114
|
-
ARG VLLM_DEEPGEMM_COMMIT=
|
|
114
|
+
ARG VLLM_DEEPGEMM_COMMIT=0f5f266
|
|
115
115
|
ARG VLLM_FLASHINFER_VERSION=0.6.1
|
|
116
116
|
ARG VLLM_FLASHATTENTION_VERSION=2.8.3
|
|
117
117
|
ARG VLLM_LMCACHE_VERSION=0.3.12
|
|
118
118
|
ARG VLLM_MOONCAKE_VERSION=0.3.8.post1
|
|
119
|
-
ARG VLLM_OMNI_COMMIT=
|
|
119
|
+
ARG VLLM_OMNI_COMMIT=d6f93b0
|
|
120
120
|
ARG SGLANG_BASE_IMAGE=vllm
|
|
121
121
|
ARG SGLANG_VERSION=0.5.8
|
|
122
122
|
ARG SGLANG_BUILD_BASE_IMAGE=vllm-build
|
|
@@ -844,7 +844,8 @@ RUN <<EOF
|
|
|
844
844
|
git -C /tmp clone --recursive --shallow-submodules \
|
|
845
845
|
https://github.com/deepseek-ai/DeepEP.git deep_ep \
|
|
846
846
|
&& pushd /tmp/deep_ep \
|
|
847
|
-
&& git checkout ${VLLM_DEEPEP_COMMIT}
|
|
847
|
+
&& git checkout ${VLLM_DEEPEP_COMMIT} \
|
|
848
|
+
&& git submodule update --init --recursive
|
|
848
849
|
|
|
849
850
|
# Build
|
|
850
851
|
CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
@@ -914,7 +915,8 @@ RUN <<EOF
|
|
|
914
915
|
git -C /tmp clone --recursive --shallow-submodules \
|
|
915
916
|
https://github.com/ppl-ai/pplx-kernels.git pplx-kernels \
|
|
916
917
|
&& pushd /tmp/pplx-kernels \
|
|
917
|
-
&& git checkout ${VLLM_PPLX_KERNEL_COMMIT}
|
|
918
|
+
&& git checkout ${VLLM_PPLX_KERNEL_COMMIT} \
|
|
919
|
+
&& git submodule update --init --recursive
|
|
918
920
|
|
|
919
921
|
# Build
|
|
920
922
|
CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
@@ -1315,6 +1317,9 @@ cuda-python==${CUDA_MAJOR}.${CUDA_MINOR}
|
|
|
1315
1317
|
pynvml==${CUDA_MAJOR}
|
|
1316
1318
|
nvidia-nvshmem-cu${CUDA_MAJOR}
|
|
1317
1319
|
nvshmem4py-cu${CUDA_MAJOR}
|
|
1320
|
+
nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
|
|
1321
|
+
nvidia-cudnn-frontend>=1.17.0
|
|
1322
|
+
nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
|
|
1318
1323
|
EOT
|
|
1319
1324
|
uv pip install \
|
|
1320
1325
|
-r /tmp/requirements.txt
|
|
@@ -1811,7 +1816,8 @@ EOF
|
|
|
1811
1816
|
|
|
1812
1817
|
## Entrypoint
|
|
1813
1818
|
|
|
1814
|
-
ENV
|
|
1819
|
+
ENV SGLANG_DISABLE_CUDNN_CHECK=1 \
|
|
1820
|
+
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 \
|
|
1815
1821
|
SGLANG_INT4_WEIGHT=0 \
|
|
1816
1822
|
SGLANG_MOE_PADDING=1 \
|
|
1817
1823
|
SGLANG_SET_CPU_AFFINITY=1
|