gpustack-runner 0.1.24.post2__tar.gz → 0.1.24.post3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/PKG-INFO +21 -21
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/README.md +20 -20
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.24.post3/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/runner.py.json +117 -7
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile +74 -0
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml +28 -0
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/cuda/Dockerfile +25 -0
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/cuda/patches/vllm_001_wrong_dp_ray.patch +41 -0
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/matrix.yaml +55 -0
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/rocm/Dockerfile +25 -0
- gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/rocm/patches/vllm_001_wrong_dp_ray.patch +41 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/README.md +2 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/Dockerfile +18 -3
- gpustack_runner-0.1.24.post3/pack/cann/patches/vllm_001_wrong_dp_ray.patch +41 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cuda/Dockerfile +23 -10
- gpustack_runner-0.1.24.post3/pack/cuda/patches/vllm_001_wrong_dp_ray.patch +41 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/matrix.yaml +17 -17
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/rocm/Dockerfile +22 -14
- gpustack_runner-0.1.24.post3/pack/rocm/patches/vllm_001_wrong_dp_ray.patch +41 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +117 -7
- gpustack_runner-0.1.24.post2/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.codespelldict +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.codespellrc +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.gitattributes +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.gitignore +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.python-version +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/LICENSE +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/Makefile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/docs/index.md +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/__utils__.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/cmds/images.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/envs.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/runner.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/hatch.toml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/mindie-atb-models_2.3.0_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/mindie-atb-models_2.3.0_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/corex/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/hggc/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/musa/Dockerfile +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/squash_expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/squash_image.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pyproject.toml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pytest.ini +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/ruff.toml +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/test_utils.py +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/activate +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/uv.lock +0 -0
- {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.24.
|
|
3
|
+
Version: 0.1.24.post3
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -52,17 +52,17 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
52
52
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
53
53
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
54
54
|
|
|
55
|
-
| CANN Version <br/> (Variant) | MindIE | vLLM
|
|
56
|
-
|
|
57
|
-
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
58
|
-
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
59
|
-
| 8.5 (310P) | `2.3.0` | `0.14.1`
|
|
60
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
61
|
-
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
62
|
-
| 8.3 (310P) | `2.2.rc1` |
|
|
63
|
-
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`,
|
|
64
|
-
| 8.2 (910B) | `2.1.rc2` | `0.10.2`,
|
|
65
|
-
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2`
|
|
55
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
56
|
+
|------------------------------|-----------|--------------------------------------------------------------------|------------------------|
|
|
57
|
+
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
58
|
+
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
59
|
+
| 8.5 (310P) | `2.3.0` | `0.14.1` | |
|
|
60
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
61
|
+
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
62
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
63
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
|
|
64
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
|
|
65
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
66
66
|
|
|
67
67
|
### Iluvatar CoreX
|
|
68
68
|
|
|
@@ -80,11 +80,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
80
80
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
81
81
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
82
82
|
|
|
83
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
84
|
-
|
|
85
|
-
| 12.9 | `0.
|
|
86
|
-
| 12.8 | `0.
|
|
87
|
-
| 12.6 | `0.
|
|
83
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
84
|
+
|------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
|
|
85
|
+
| 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
|
|
86
|
+
| 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
|
|
87
|
+
| 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
|
|
88
88
|
|
|
89
89
|
### Hygon DTK
|
|
90
90
|
|
|
@@ -128,10 +128,10 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
128
128
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
129
129
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
130
130
|
|
|
131
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
132
|
-
|
|
133
|
-
| 7.0 |
|
|
134
|
-
| 6.4 |
|
|
131
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
132
|
+
|------------------------------|-------------------------------------------------|--------------------------------------------|
|
|
133
|
+
| 7.0 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
|
|
134
|
+
| 6.4 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
|
|
135
135
|
|
|
136
136
|
## Directory Structure
|
|
137
137
|
|
|
@@ -32,17 +32,17 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
32
32
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
33
33
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
34
34
|
|
|
35
|
-
| CANN Version <br/> (Variant) | MindIE | vLLM
|
|
36
|
-
|
|
37
|
-
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
38
|
-
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0`
|
|
39
|
-
| 8.5 (310P) | `2.3.0` | `0.14.1`
|
|
40
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
41
|
-
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0`
|
|
42
|
-
| 8.3 (310P) | `2.2.rc1` |
|
|
43
|
-
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`,
|
|
44
|
-
| 8.2 (910B) | `2.1.rc2` | `0.10.2`,
|
|
45
|
-
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2`
|
|
35
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
36
|
+
|------------------------------|-----------|--------------------------------------------------------------------|------------------------|
|
|
37
|
+
| 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
38
|
+
| 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
|
|
39
|
+
| 8.5 (310P) | `2.3.0` | `0.14.1` | |
|
|
40
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
41
|
+
| 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
42
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
43
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
|
|
44
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
|
|
45
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
46
46
|
|
|
47
47
|
### Iluvatar CoreX
|
|
48
48
|
|
|
@@ -60,11 +60,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
60
60
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
61
61
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
62
62
|
|
|
63
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
64
|
-
|
|
65
|
-
| 12.9 | `0.
|
|
66
|
-
| 12.8 | `0.
|
|
67
|
-
| 12.6 | `0.
|
|
63
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
64
|
+
|------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
|
|
65
|
+
| 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
|
|
66
|
+
| 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
|
|
67
|
+
| 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
|
|
68
68
|
|
|
69
69
|
### Hygon DTK
|
|
70
70
|
|
|
@@ -108,10 +108,10 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
108
108
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
109
109
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
110
110
|
|
|
111
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
112
|
-
|
|
113
|
-
| 7.0 |
|
|
114
|
-
| 6.4 |
|
|
111
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
112
|
+
|------------------------------|-------------------------------------------------|--------------------------------------------|
|
|
113
|
+
| 7.0 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
|
|
114
|
+
| 6.4 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
|
|
115
115
|
|
|
116
116
|
## Directory Structure
|
|
117
117
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.24.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1, 24, '
|
|
30
|
+
__version__ = version = '0.1.24.post3'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 24, 'post3')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "dc41ed2"
|
{gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/runner.py.json
RENAMED
|
@@ -261,7 +261,7 @@
|
|
|
261
261
|
"service_version": "0.10.1.1",
|
|
262
262
|
"platform": "linux/amd64",
|
|
263
263
|
"docker_image": "gpustack/runner:cann8.2-a3-vllm0.10.1.1",
|
|
264
|
-
"deprecated":
|
|
264
|
+
"deprecated": true
|
|
265
265
|
},
|
|
266
266
|
{
|
|
267
267
|
"backend": "cann",
|
|
@@ -272,7 +272,7 @@
|
|
|
272
272
|
"service_version": "0.10.1.1",
|
|
273
273
|
"platform": "linux/arm64",
|
|
274
274
|
"docker_image": "gpustack/runner:cann8.2-a3-vllm0.10.1.1",
|
|
275
|
-
"deprecated":
|
|
275
|
+
"deprecated": true
|
|
276
276
|
},
|
|
277
277
|
{
|
|
278
278
|
"backend": "cann",
|
|
@@ -558,7 +558,7 @@
|
|
|
558
558
|
"service_version": "0.10.1.1",
|
|
559
559
|
"platform": "linux/amd64",
|
|
560
560
|
"docker_image": "gpustack/runner:cann8.2-910b-vllm0.10.1.1",
|
|
561
|
-
"deprecated":
|
|
561
|
+
"deprecated": true
|
|
562
562
|
},
|
|
563
563
|
{
|
|
564
564
|
"backend": "cann",
|
|
@@ -569,7 +569,7 @@
|
|
|
569
569
|
"service_version": "0.10.1.1",
|
|
570
570
|
"platform": "linux/arm64",
|
|
571
571
|
"docker_image": "gpustack/runner:cann8.2-910b-vllm0.10.1.1",
|
|
572
|
-
"deprecated":
|
|
572
|
+
"deprecated": true
|
|
573
573
|
},
|
|
574
574
|
{
|
|
575
575
|
"backend": "cann",
|
|
@@ -624,7 +624,7 @@
|
|
|
624
624
|
"service_version": "0.9.1",
|
|
625
625
|
"platform": "linux/amd64",
|
|
626
626
|
"docker_image": "gpustack/runner:cann8.2-910b-vllm0.9.1",
|
|
627
|
-
"deprecated":
|
|
627
|
+
"deprecated": true
|
|
628
628
|
},
|
|
629
629
|
{
|
|
630
630
|
"backend": "cann",
|
|
@@ -635,7 +635,7 @@
|
|
|
635
635
|
"service_version": "0.9.1",
|
|
636
636
|
"platform": "linux/arm64",
|
|
637
637
|
"docker_image": "gpustack/runner:cann8.2-910b-vllm0.9.1",
|
|
638
|
-
"deprecated":
|
|
638
|
+
"deprecated": true
|
|
639
639
|
},
|
|
640
640
|
{
|
|
641
641
|
"backend": "cann",
|
|
@@ -802,6 +802,28 @@
|
|
|
802
802
|
"docker_image": "gpustack/runner:corex4.2-vllm0.8.3",
|
|
803
803
|
"deprecated": false
|
|
804
804
|
},
|
|
805
|
+
{
|
|
806
|
+
"backend": "cuda",
|
|
807
|
+
"backend_version": "12.9",
|
|
808
|
+
"original_backend_version": "12.9.1",
|
|
809
|
+
"backend_variant": "",
|
|
810
|
+
"service": "sglang",
|
|
811
|
+
"service_version": "0.5.8",
|
|
812
|
+
"platform": "linux/amd64",
|
|
813
|
+
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.8",
|
|
814
|
+
"deprecated": false
|
|
815
|
+
},
|
|
816
|
+
{
|
|
817
|
+
"backend": "cuda",
|
|
818
|
+
"backend_version": "12.9",
|
|
819
|
+
"original_backend_version": "12.9.1",
|
|
820
|
+
"backend_variant": "",
|
|
821
|
+
"service": "sglang",
|
|
822
|
+
"service_version": "0.5.8",
|
|
823
|
+
"platform": "linux/arm64",
|
|
824
|
+
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.8",
|
|
825
|
+
"deprecated": false
|
|
826
|
+
},
|
|
805
827
|
{
|
|
806
828
|
"backend": "cuda",
|
|
807
829
|
"backend_version": "12.9",
|
|
@@ -846,6 +868,28 @@
|
|
|
846
868
|
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.6.post2",
|
|
847
869
|
"deprecated": false
|
|
848
870
|
},
|
|
871
|
+
{
|
|
872
|
+
"backend": "cuda",
|
|
873
|
+
"backend_version": "12.9",
|
|
874
|
+
"original_backend_version": "12.9.1",
|
|
875
|
+
"backend_variant": "",
|
|
876
|
+
"service": "vllm",
|
|
877
|
+
"service_version": "0.14.1",
|
|
878
|
+
"platform": "linux/amd64",
|
|
879
|
+
"docker_image": "gpustack/runner:cuda12.9-vllm0.14.1",
|
|
880
|
+
"deprecated": false
|
|
881
|
+
},
|
|
882
|
+
{
|
|
883
|
+
"backend": "cuda",
|
|
884
|
+
"backend_version": "12.9",
|
|
885
|
+
"original_backend_version": "12.9.1",
|
|
886
|
+
"backend_variant": "",
|
|
887
|
+
"service": "vllm",
|
|
888
|
+
"service_version": "0.14.1",
|
|
889
|
+
"platform": "linux/arm64",
|
|
890
|
+
"docker_image": "gpustack/runner:cuda12.9-vllm0.14.1",
|
|
891
|
+
"deprecated": false
|
|
892
|
+
},
|
|
849
893
|
{
|
|
850
894
|
"backend": "cuda",
|
|
851
895
|
"backend_version": "12.9",
|
|
@@ -912,6 +956,28 @@
|
|
|
912
956
|
"docker_image": "gpustack/runner:cuda12.9-vllm0.11.2",
|
|
913
957
|
"deprecated": false
|
|
914
958
|
},
|
|
959
|
+
{
|
|
960
|
+
"backend": "cuda",
|
|
961
|
+
"backend_version": "12.8",
|
|
962
|
+
"original_backend_version": "12.8.1",
|
|
963
|
+
"backend_variant": "",
|
|
964
|
+
"service": "sglang",
|
|
965
|
+
"service_version": "0.5.8",
|
|
966
|
+
"platform": "linux/amd64",
|
|
967
|
+
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.8",
|
|
968
|
+
"deprecated": false
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
"backend": "cuda",
|
|
972
|
+
"backend_version": "12.8",
|
|
973
|
+
"original_backend_version": "12.8.1",
|
|
974
|
+
"backend_variant": "",
|
|
975
|
+
"service": "sglang",
|
|
976
|
+
"service_version": "0.5.8",
|
|
977
|
+
"platform": "linux/arm64",
|
|
978
|
+
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.8",
|
|
979
|
+
"deprecated": false
|
|
980
|
+
},
|
|
915
981
|
{
|
|
916
982
|
"backend": "cuda",
|
|
917
983
|
"backend_version": "12.8",
|
|
@@ -998,7 +1064,7 @@
|
|
|
998
1064
|
"service_version": "0.5.4.post3",
|
|
999
1065
|
"platform": "linux/amd64",
|
|
1000
1066
|
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
|
|
1001
|
-
"deprecated":
|
|
1067
|
+
"deprecated": true
|
|
1002
1068
|
},
|
|
1003
1069
|
{
|
|
1004
1070
|
"backend": "cuda",
|
|
@@ -1009,6 +1075,28 @@
|
|
|
1009
1075
|
"service_version": "0.5.4.post3",
|
|
1010
1076
|
"platform": "linux/arm64",
|
|
1011
1077
|
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
|
|
1078
|
+
"deprecated": true
|
|
1079
|
+
},
|
|
1080
|
+
{
|
|
1081
|
+
"backend": "cuda",
|
|
1082
|
+
"backend_version": "12.8",
|
|
1083
|
+
"original_backend_version": "12.8.1",
|
|
1084
|
+
"backend_variant": "",
|
|
1085
|
+
"service": "vllm",
|
|
1086
|
+
"service_version": "0.14.1",
|
|
1087
|
+
"platform": "linux/amd64",
|
|
1088
|
+
"docker_image": "gpustack/runner:cuda12.8-vllm0.14.1",
|
|
1089
|
+
"deprecated": false
|
|
1090
|
+
},
|
|
1091
|
+
{
|
|
1092
|
+
"backend": "cuda",
|
|
1093
|
+
"backend_version": "12.8",
|
|
1094
|
+
"original_backend_version": "12.8.1",
|
|
1095
|
+
"backend_variant": "",
|
|
1096
|
+
"service": "vllm",
|
|
1097
|
+
"service_version": "0.14.1",
|
|
1098
|
+
"platform": "linux/arm64",
|
|
1099
|
+
"docker_image": "gpustack/runner:cuda12.8-vllm0.14.1",
|
|
1012
1100
|
"deprecated": false
|
|
1013
1101
|
},
|
|
1014
1102
|
{
|
|
@@ -1209,6 +1297,28 @@
|
|
|
1209
1297
|
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
|
|
1210
1298
|
"deprecated": true
|
|
1211
1299
|
},
|
|
1300
|
+
{
|
|
1301
|
+
"backend": "cuda",
|
|
1302
|
+
"backend_version": "12.6",
|
|
1303
|
+
"original_backend_version": "12.6.3",
|
|
1304
|
+
"backend_variant": "",
|
|
1305
|
+
"service": "vllm",
|
|
1306
|
+
"service_version": "0.14.1",
|
|
1307
|
+
"platform": "linux/amd64",
|
|
1308
|
+
"docker_image": "gpustack/runner:cuda12.6-vllm0.14.1",
|
|
1309
|
+
"deprecated": false
|
|
1310
|
+
},
|
|
1311
|
+
{
|
|
1312
|
+
"backend": "cuda",
|
|
1313
|
+
"backend_version": "12.6",
|
|
1314
|
+
"original_backend_version": "12.6.3",
|
|
1315
|
+
"backend_variant": "",
|
|
1316
|
+
"service": "vllm",
|
|
1317
|
+
"service_version": "0.14.1",
|
|
1318
|
+
"platform": "linux/arm64",
|
|
1319
|
+
"docker_image": "gpustack/runner:cuda12.6-vllm0.14.1",
|
|
1320
|
+
"deprecated": false
|
|
1321
|
+
},
|
|
1212
1322
|
{
|
|
1213
1323
|
"backend": "cuda",
|
|
1214
1324
|
"backend_version": "12.6",
|
gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CANN_VERSION=8.3
|
|
3
|
+
ARG CANN_ARCHS=910b
|
|
4
|
+
ARG SGLANG_VERSION=0.12.0
|
|
5
|
+
ARG SGLANG_KERNEL_VERSION=20251206
|
|
6
|
+
|
|
7
|
+
FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-sglang${SGLANG_VERSION} AS sglang
|
|
8
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
9
|
+
|
|
10
|
+
ARG TARGETPLATFORM
|
|
11
|
+
ARG TARGETOS
|
|
12
|
+
ARG TARGETARCH
|
|
13
|
+
|
|
14
|
+
## Reinstall SGLang Kernel
|
|
15
|
+
|
|
16
|
+
ARG CMAKE_MAX_JOBS
|
|
17
|
+
ARG SGLANG_VERSION
|
|
18
|
+
ARG SGLANG_KERNEL_VERSION
|
|
19
|
+
|
|
20
|
+
ENV SGLANG_VERSION=${SGLANG_VERSION} \
|
|
21
|
+
SGLANG_KERNEL_VERSION=${SGLANG_KERNEL_VERSION}
|
|
22
|
+
|
|
23
|
+
RUN <<EOF
|
|
24
|
+
# SGLang
|
|
25
|
+
|
|
26
|
+
CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
27
|
+
if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
|
|
28
|
+
CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
|
|
29
|
+
fi
|
|
30
|
+
if (( $(echo "${CMAKE_MAX_JOBS} > 8" | bc -l) )); then
|
|
31
|
+
CMAKE_MAX_JOBS="8"
|
|
32
|
+
fi
|
|
33
|
+
export MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
34
|
+
export COMPILE_CUSTOM_KERNELS=1
|
|
35
|
+
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CANN_HOME}/ascend-toolkit/latest/$(uname -i)-linux/devlib"
|
|
36
|
+
export LD_LIBRARY_PATH="${CANN_HOME}/ascend-toolkit/latest/runtime/lib64/stub:${LD_LIBRARY_PATH}"
|
|
37
|
+
source ${CANN_HOME}/ascend-toolkit/set_env.sh
|
|
38
|
+
echo "Building SGLang with the following environment variables:"
|
|
39
|
+
env
|
|
40
|
+
|
|
41
|
+
# Install Dependencies
|
|
42
|
+
cat <<EOT >/tmp/requirements.txt
|
|
43
|
+
attrs==25.4.0
|
|
44
|
+
decorator==5.2.1
|
|
45
|
+
psutil==7.1.3
|
|
46
|
+
pyyaml==6.0.3
|
|
47
|
+
triton-ascend==3.2.0
|
|
48
|
+
EOT
|
|
49
|
+
uv pip install \
|
|
50
|
+
-r /tmp/requirements.txt
|
|
51
|
+
|
|
52
|
+
# Build and Install SGLang Kernel
|
|
53
|
+
git -C /tmp clone --recursive --shallow-submodules \
|
|
54
|
+
--depth 1 --branch ${SGLANG_KERNEL_VERSION} --single-branch \
|
|
55
|
+
https://github.com/sgl-project/sgl-kernel-npu.git sgl-kernel-npu
|
|
56
|
+
unset ASCEND_HOME_PATH
|
|
57
|
+
pushd /tmp/sgl-kernel-npu \
|
|
58
|
+
&& ./build.sh \
|
|
59
|
+
&& tree -hs /tmp/sgl-kernel-npu/output \
|
|
60
|
+
&& uv pip install /tmp/sgl-kernel-npu/output/deep_ep*.whl /tmp/sgl-kernel-npu/output/sgl_kernel_npu*.whl
|
|
61
|
+
|
|
62
|
+
# Postprocess SGLang Kernel (DeepEP)
|
|
63
|
+
cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -sf deep_ep/deep_ep_cpp*.so
|
|
64
|
+
|
|
65
|
+
# Cleanup
|
|
66
|
+
rm -rf /var/tmp/* \
|
|
67
|
+
&& rm -rf /tmp/* \
|
|
68
|
+
&& ccache --clear --clean
|
|
69
|
+
EOF
|
|
70
|
+
|
|
71
|
+
## Entrypoint
|
|
72
|
+
|
|
73
|
+
WORKDIR /
|
|
74
|
+
ENTRYPOINT [ "tini", "--" ]
|
gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Ascend CANN
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for A3.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cann"
|
|
10
|
+
services:
|
|
11
|
+
- "sglang"
|
|
12
|
+
platforms:
|
|
13
|
+
- "linux/arm64"
|
|
14
|
+
args:
|
|
15
|
+
- "CANN_VERSION=8.3"
|
|
16
|
+
- "CANN_ARCHS=a3"
|
|
17
|
+
- "SGLANG_VERSION=0.5.7"
|
|
18
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for 910B.
|
|
19
|
+
##
|
|
20
|
+
- backend: "cann"
|
|
21
|
+
services:
|
|
22
|
+
- "sglang"
|
|
23
|
+
platforms:
|
|
24
|
+
- "linux/arm64"
|
|
25
|
+
args:
|
|
26
|
+
- "CANN_VERSION=8.3"
|
|
27
|
+
- "CANN_ARCHS=910b"
|
|
28
|
+
- "SGLANG_VERSION=0.5.7"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CUDA_VERSION=12.8
|
|
3
|
+
ARG VLLM_VERSION=0.13.0
|
|
4
|
+
|
|
5
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
|
|
6
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
7
|
+
|
|
8
|
+
ARG TARGETPLATFORM
|
|
9
|
+
ARG TARGETOS
|
|
10
|
+
ARG TARGETARCH
|
|
11
|
+
|
|
12
|
+
## Patch
|
|
13
|
+
|
|
14
|
+
RUN --mount=type=bind,target=/workspace,rw <<EOF
|
|
15
|
+
# Patch
|
|
16
|
+
|
|
17
|
+
tree -hs /workspace/patches
|
|
18
|
+
pushd $(pip show vllm | grep Location: | cut -d" " -f 2) \
|
|
19
|
+
&& patch -p1 < /workspace/patches/vllm_*.patch
|
|
20
|
+
EOF
|
|
21
|
+
|
|
22
|
+
## Entrypoint
|
|
23
|
+
|
|
24
|
+
WORKDIR /
|
|
25
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
diff --git a/vllm/utils/network_utils.py b/vllm/utils/network_utils.py
|
|
2
|
+
index 7d01533cb..311ed44df 100644
|
|
3
|
+
--- a/vllm/utils/network_utils.py
|
|
4
|
+
+++ b/vllm/utils/network_utils.py
|
|
5
|
+
@@ -147,6 +147,9 @@ def get_open_zmq_inproc_path() -> str:
|
|
6
|
+
return f"inproc://{uuid4()}"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
+_next_port: int | None = None
|
|
10
|
+
+
|
|
11
|
+
+
|
|
12
|
+
def get_open_port() -> int:
|
|
13
|
+
"""
|
|
14
|
+
Get an open port for the vLLM process to listen on.
|
|
15
|
+
@@ -163,7 +166,7 @@ def get_open_port() -> int:
|
|
16
|
+
candidate_port = _get_open_port()
|
|
17
|
+
if candidate_port not in reserved_port_range:
|
|
18
|
+
return candidate_port
|
|
19
|
+
- return _get_open_port()
|
|
20
|
+
+ return _get_open_port(_next_port)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_open_ports_list(count: int = 5) -> list[int]:
|
|
24
|
+
@@ -174,13 +177,15 @@ def get_open_ports_list(count: int = 5) -> list[int]:
|
|
25
|
+
return list(ports)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
-def _get_open_port() -> int:
|
|
29
|
+
- port = envs.VLLM_PORT
|
|
30
|
+
+def _get_open_port(start: int | None = None) -> int:
|
|
31
|
+
+ port = start or envs.VLLM_PORT
|
|
32
|
+
if port is not None:
|
|
33
|
+
+ global _next_port
|
|
34
|
+
while True:
|
|
35
|
+
try:
|
|
36
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
37
|
+
s.bind(("", port))
|
|
38
|
+
+ _next_port = port + 1
|
|
39
|
+
return port
|
|
40
|
+
except OSError:
|
|
41
|
+
port += 1 # Increment port number if already in use
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# NVIDIA CUDA
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed NVIDIA CUDA 12.9.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cuda"
|
|
10
|
+
services:
|
|
11
|
+
- "vllm"
|
|
12
|
+
args:
|
|
13
|
+
- "CUDA_VERSION=12.9"
|
|
14
|
+
- "VLLM_VERSION=0.13.0"
|
|
15
|
+
## Packed NVIDIA CUDA 12.8.
|
|
16
|
+
##
|
|
17
|
+
- backend: "cuda"
|
|
18
|
+
services:
|
|
19
|
+
- "vllm"
|
|
20
|
+
args:
|
|
21
|
+
- "CUDA_VERSION=12.8"
|
|
22
|
+
- "VLLM_VERSION=0.13.0"
|
|
23
|
+
## Packed NVIDIA CUDA 12.6.
|
|
24
|
+
##
|
|
25
|
+
- backend: "cuda"
|
|
26
|
+
services:
|
|
27
|
+
- "vllm"
|
|
28
|
+
args:
|
|
29
|
+
- "CUDA_VERSION=12.6"
|
|
30
|
+
- "VLLM_VERSION=0.13.0"
|
|
31
|
+
|
|
32
|
+
#
|
|
33
|
+
# AMD ROCm
|
|
34
|
+
#
|
|
35
|
+
|
|
36
|
+
## Packed AMD ROCm 7.0.
|
|
37
|
+
##
|
|
38
|
+
- backend: "rocm"
|
|
39
|
+
services:
|
|
40
|
+
- "vllm"
|
|
41
|
+
platforms:
|
|
42
|
+
- "linux/amd64"
|
|
43
|
+
args:
|
|
44
|
+
- "ROCM_VERSION=7.0"
|
|
45
|
+
- "VLLM_VERSION=0.13.0"
|
|
46
|
+
## Packed AMD ROCm 6.4.
|
|
47
|
+
##
|
|
48
|
+
- backend: "rocm"
|
|
49
|
+
services:
|
|
50
|
+
- "vllm"
|
|
51
|
+
platforms:
|
|
52
|
+
- "linux/amd64"
|
|
53
|
+
args:
|
|
54
|
+
- "ROCM_VERSION=6.4"
|
|
55
|
+
- "VLLM_VERSION=0.13.0"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG ROCM_VERSION=6.4
|
|
3
|
+
ARG VLLM_VERSION=0.13.0
|
|
4
|
+
|
|
5
|
+
FROM gpustack/runner:rocm${ROCM_VERSION}-vllm${VLLM_VERSION} AS vllm
|
|
6
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
7
|
+
|
|
8
|
+
ARG TARGETPLATFORM
|
|
9
|
+
ARG TARGETOS
|
|
10
|
+
ARG TARGETARCH
|
|
11
|
+
|
|
12
|
+
## Patch
|
|
13
|
+
|
|
14
|
+
RUN --mount=type=bind,target=/workspace,rw <<EOF
|
|
15
|
+
# Patch
|
|
16
|
+
|
|
17
|
+
tree -hs /workspace/patches
|
|
18
|
+
pushd $(pip show vllm | grep Location: | cut -d" " -f 2) \
|
|
19
|
+
&& patch -p1 < /workspace/patches/vllm_*.patch
|
|
20
|
+
EOF
|
|
21
|
+
|
|
22
|
+
## Entrypoint
|
|
23
|
+
|
|
24
|
+
WORKDIR /
|
|
25
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
diff --git a/vllm/utils/network_utils.py b/vllm/utils/network_utils.py
|
|
2
|
+
index 7d01533cb..311ed44df 100644
|
|
3
|
+
--- a/vllm/utils/network_utils.py
|
|
4
|
+
+++ b/vllm/utils/network_utils.py
|
|
5
|
+
@@ -147,6 +147,9 @@ def get_open_zmq_inproc_path() -> str:
|
|
6
|
+
return f"inproc://{uuid4()}"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
+_next_port: int | None = None
|
|
10
|
+
+
|
|
11
|
+
+
|
|
12
|
+
def get_open_port() -> int:
|
|
13
|
+
"""
|
|
14
|
+
Get an open port for the vLLM process to listen on.
|
|
15
|
+
@@ -163,7 +166,7 @@ def get_open_port() -> int:
|
|
16
|
+
candidate_port = _get_open_port()
|
|
17
|
+
if candidate_port not in reserved_port_range:
|
|
18
|
+
return candidate_port
|
|
19
|
+
- return _get_open_port()
|
|
20
|
+
+ return _get_open_port(_next_port)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_open_ports_list(count: int = 5) -> list[int]:
|
|
24
|
+
@@ -174,13 +177,15 @@ def get_open_ports_list(count: int = 5) -> list[int]:
|
|
25
|
+
return list(ports)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
-def _get_open_port() -> int:
|
|
29
|
+
- port = envs.VLLM_PORT
|
|
30
|
+
+def _get_open_port(start: int | None = None) -> int:
|
|
31
|
+
+ port = start or envs.VLLM_PORT
|
|
32
|
+
if port is not None:
|
|
33
|
+
+ global _next_port
|
|
34
|
+
while True:
|
|
35
|
+
try:
|
|
36
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
37
|
+
s.bind(("", port))
|
|
38
|
+
+ _next_port = port + 1
|
|
39
|
+
return port
|
|
40
|
+
except OSError:
|
|
41
|
+
port += 1 # Increment port number if already in use
|
{gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/README.md
RENAMED
|
@@ -33,3 +33,5 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
|
|
|
33
33
|
- [x] 2025-12-19: Install `petit-kernel` package for vLLM 0.12.0/0.11.2 and SGLang 0.5.6.post2/0.5.5.post3 of ROcm released images.
|
|
34
34
|
- [x] 2025-12-24: Apply ATB config patches to MindIE 2.2.rc1 for CANN released images.
|
|
35
35
|
- [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
|
|
36
|
+
- [x] 2026-01-29: Apply DP deployment patches to vLLM 0.13.0 for CUDA/ROCm released images.
|
|
37
|
+
- [x] 2026-01-29: Reinstall SGLang Kernel for SGLang 0.5.7 of CANN released images.
|