gpustack-runner 0.1.22.post3__tar.gz → 0.1.22.post5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/PKG-INFO +13 -12
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/README.md +12 -11
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.22.post5/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/runner.py.json +44 -0
- gpustack_runner-0.1.22.post5/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +25 -0
- gpustack_runner-0.1.22.post5/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +33 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/README.md +1 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/Dockerfile +3 -1
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cuda/Dockerfile +1 -1
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/matrix.yaml +0 -1
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +44 -0
- gpustack_runner-0.1.22.post3/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.codespelldict +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.codespellrc +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.gitattributes +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.gitignore +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.python-version +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/LICENSE +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/Makefile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/docs/index.md +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/images.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/runner.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/hatch.toml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/corex/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pyproject.toml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pytest.ini +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/ruff.toml +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/activate +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/uv.lock +0 -0
- {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.22.
|
|
3
|
+
Version: 0.1.22.post5
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -54,6 +54,7 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
54
54
|
|
|
55
55
|
> [!IMPORTANT]
|
|
56
56
|
> - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
|
|
57
|
+
> - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
|
|
57
58
|
> - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
|
|
58
59
|
> - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
|
|
59
60
|
|
|
@@ -86,12 +87,12 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
86
87
|
> - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
|
|
87
88
|
> - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
|
|
88
89
|
|
|
89
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
90
|
-
|
|
91
|
-
| 12.9 | `0.13.0`, `0.12.0`,
|
|
92
|
-
| 12.8 | `0.13.0`, `0.12.0`,
|
|
93
|
-
| 12.6 | `0.13.0`, `0.12.0`,
|
|
94
|
-
| 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`
|
|
90
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
91
|
+
|------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
|
|
92
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
|
|
93
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
|
|
94
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
|
|
95
|
+
| 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
|
|
95
96
|
|
|
96
97
|
### Hygon DTK
|
|
97
98
|
|
|
@@ -124,11 +125,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
124
125
|
> - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
|
|
125
126
|
> - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
|
|
126
127
|
|
|
127
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
128
|
-
|
|
129
|
-
| 7.0 | `0.13.0`, `0.12.0`,
|
|
130
|
-
| 6.4 | `0.13.0`, `0.12.0`,
|
|
131
|
-
| 6.3 | `0.10.1.1`, `0.10.0`
|
|
128
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
129
|
+
|------------------------------|-------------------------------------------------|----------------------------------|
|
|
130
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
|
|
131
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
|
|
132
|
+
| 6.3 | `0.10.1.1`, `0.10.0` | |
|
|
132
133
|
|
|
133
134
|
## Directory Structure
|
|
134
135
|
|
|
@@ -34,6 +34,7 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
34
34
|
|
|
35
35
|
> [!IMPORTANT]
|
|
36
36
|
> - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
|
|
37
|
+
> - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
|
|
37
38
|
> - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
|
|
38
39
|
> - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
|
|
39
40
|
|
|
@@ -66,12 +67,12 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
66
67
|
> - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
|
|
67
68
|
> - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
|
|
68
69
|
|
|
69
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
70
|
-
|
|
71
|
-
| 12.9 | `0.13.0`, `0.12.0`,
|
|
72
|
-
| 12.8 | `0.13.0`, `0.12.0`,
|
|
73
|
-
| 12.6 | `0.13.0`, `0.12.0`,
|
|
74
|
-
| 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`
|
|
70
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
71
|
+
|------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
|
|
72
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
|
|
73
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
|
|
74
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
|
|
75
|
+
| 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
|
|
75
76
|
|
|
76
77
|
### Hygon DTK
|
|
77
78
|
|
|
@@ -104,11 +105,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
104
105
|
> - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
|
|
105
106
|
> - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
|
|
106
107
|
|
|
107
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
108
|
-
|
|
109
|
-
| 7.0 | `0.13.0`, `0.12.0`,
|
|
110
|
-
| 6.4 | `0.13.0`, `0.12.0`,
|
|
111
|
-
| 6.3 | `0.10.1.1`, `0.10.0`
|
|
108
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
109
|
+
|------------------------------|-------------------------------------------------|----------------------------------|
|
|
110
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
|
|
111
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
|
|
112
|
+
| 6.3 | `0.10.1.1`, `0.10.0` | |
|
|
112
113
|
|
|
113
114
|
## Directory Structure
|
|
114
115
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.22.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1, 22, '
|
|
30
|
+
__version__ = version = '0.1.22.post5'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 22, 'post5')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "18bd835"
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/runner.py.json
RENAMED
|
@@ -901,6 +901,28 @@
|
|
|
901
901
|
"docker_image": "gpustack/runner:cuda12.8-vllm0.10.0",
|
|
902
902
|
"deprecated": false
|
|
903
903
|
},
|
|
904
|
+
{
|
|
905
|
+
"backend": "cuda",
|
|
906
|
+
"backend_version": "12.8",
|
|
907
|
+
"original_backend_version": "12.8.1",
|
|
908
|
+
"backend_variant": "",
|
|
909
|
+
"service": "voxbox",
|
|
910
|
+
"service_version": "0.0.21",
|
|
911
|
+
"platform": "linux/amd64",
|
|
912
|
+
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
|
|
913
|
+
"deprecated": false
|
|
914
|
+
},
|
|
915
|
+
{
|
|
916
|
+
"backend": "cuda",
|
|
917
|
+
"backend_version": "12.8",
|
|
918
|
+
"original_backend_version": "12.8.1",
|
|
919
|
+
"backend_variant": "",
|
|
920
|
+
"service": "voxbox",
|
|
921
|
+
"service_version": "0.0.21",
|
|
922
|
+
"platform": "linux/arm64",
|
|
923
|
+
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
|
|
924
|
+
"deprecated": false
|
|
925
|
+
},
|
|
904
926
|
{
|
|
905
927
|
"backend": "cuda",
|
|
906
928
|
"backend_version": "12.8",
|
|
@@ -1077,6 +1099,28 @@
|
|
|
1077
1099
|
"docker_image": "gpustack/runner:cuda12.6-vllm0.10.0",
|
|
1078
1100
|
"deprecated": false
|
|
1079
1101
|
},
|
|
1102
|
+
{
|
|
1103
|
+
"backend": "cuda",
|
|
1104
|
+
"backend_version": "12.6",
|
|
1105
|
+
"original_backend_version": "12.6.3",
|
|
1106
|
+
"backend_variant": "",
|
|
1107
|
+
"service": "voxbox",
|
|
1108
|
+
"service_version": "0.0.21",
|
|
1109
|
+
"platform": "linux/amd64",
|
|
1110
|
+
"docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
|
|
1111
|
+
"deprecated": false
|
|
1112
|
+
},
|
|
1113
|
+
{
|
|
1114
|
+
"backend": "cuda",
|
|
1115
|
+
"backend_version": "12.6",
|
|
1116
|
+
"original_backend_version": "12.6.3",
|
|
1117
|
+
"backend_variant": "",
|
|
1118
|
+
"service": "voxbox",
|
|
1119
|
+
"service_version": "0.0.21",
|
|
1120
|
+
"platform": "linux/arm64",
|
|
1121
|
+
"docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
|
|
1122
|
+
"deprecated": false
|
|
1123
|
+
},
|
|
1080
1124
|
{
|
|
1081
1125
|
"backend": "cuda",
|
|
1082
1126
|
"backend_version": "12.6",
|
gpustack_runner-0.1.22.post5/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CANN_VERSION=8.3
|
|
3
|
+
ARG CANN_ARCHS=910b
|
|
4
|
+
ARG MINDIE_VERSION=2.2.rc1
|
|
5
|
+
|
|
6
|
+
FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-mindie${MINDIE_VERSION} AS mindie
|
|
7
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
8
|
+
|
|
9
|
+
ARG TARGETPLATFORM
|
|
10
|
+
ARG TARGETOS
|
|
11
|
+
ARG TARGETARCH
|
|
12
|
+
|
|
13
|
+
## Patch ATB Config
|
|
14
|
+
|
|
15
|
+
RUN <<EOF
|
|
16
|
+
# Patch ATB Config
|
|
17
|
+
|
|
18
|
+
sed -i "s/\"ep_level\": 2/\"ep_level\": 1/g" ${CANN_HOME}/atb-models/atb_llm/conf/config.json
|
|
19
|
+
|
|
20
|
+
EOF
|
|
21
|
+
|
|
22
|
+
## Entrypoint
|
|
23
|
+
|
|
24
|
+
WORKDIR /
|
|
25
|
+
ENTRYPOINT [ "tini", "--" ]
|
gpustack_runner-0.1.22.post5/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Ascend CANN
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for A3.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cann"
|
|
10
|
+
services:
|
|
11
|
+
- "mindie"
|
|
12
|
+
args:
|
|
13
|
+
- "CANN_VERSION=8.3"
|
|
14
|
+
- "CANN_ARCHS=a3"
|
|
15
|
+
- "MINDIE_VERSION=2.2.rc1"
|
|
16
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for 910B.
|
|
17
|
+
##
|
|
18
|
+
- backend: "cann"
|
|
19
|
+
services:
|
|
20
|
+
- "mindie"
|
|
21
|
+
args:
|
|
22
|
+
- "CANN_VERSION=8.3"
|
|
23
|
+
- "CANN_ARCHS=910b"
|
|
24
|
+
- "MINDIE_VERSION=2.2.rc1"
|
|
25
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for 310P.
|
|
26
|
+
##
|
|
27
|
+
- backend: "cann"
|
|
28
|
+
services:
|
|
29
|
+
- "mindie"
|
|
30
|
+
args:
|
|
31
|
+
- "CANN_VERSION=8.3"
|
|
32
|
+
- "CANN_ARCHS=310p"
|
|
33
|
+
- "MINDIE_VERSION=2.2.rc1"
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/README.md
RENAMED
|
@@ -31,3 +31,4 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
|
|
|
31
31
|
- [x] 2025-12-16: Uninstall `runai-model-streamer` packages from SGLang 0.5.6.post2 for CUDA released images.
|
|
32
32
|
- [x] 2025-12-19: Install `vLLM[audio]` packages for vLLM 0.12.0/0.11.2 of CUDA/ROCm released images.
|
|
33
33
|
- [x] 2025-12-19: Install `petit-kernel` package for vLLM 0.12.0/0.11.2 and SGLang 0.5.6.post2/0.5.5.post3 of ROcm released images.
|
|
34
|
+
- [x] 2025-12-24: Apply ATB config patches to MindIE 2.2.rc1 for CANN released images.
|
|
@@ -583,8 +583,10 @@ RUN --mount=type=bind,target=/workspace,rw <<EOF
|
|
|
583
583
|
# Patch
|
|
584
584
|
|
|
585
585
|
unzip /workspace/patches/mindie.zip -d /workspace/patches
|
|
586
|
-
pushd /
|
|
586
|
+
pushd ${CANN_HOME}/atb-models \
|
|
587
587
|
&& patch -p1 < /workspace/patches/mindie/*.patch
|
|
588
|
+
|
|
589
|
+
sed -i "s/\"ep_level\": 2/\"ep_level\": 1/g" ${CANN_HOME}/atb-models/atb_llm/conf/config.json
|
|
588
590
|
EOF
|
|
589
591
|
|
|
590
592
|
## Postprocess
|
|
@@ -95,7 +95,7 @@ ARG CMAKE_MAX_JOBS
|
|
|
95
95
|
ARG CUDA_VERSION=12.9.1
|
|
96
96
|
ARG CUDA_ARCHS
|
|
97
97
|
ARG VOXBOX_BASE_IMAGE=gpustack/runner:cuda${CUDA_VERSION}-python${PYTHON_VERSION}
|
|
98
|
-
ARG VOXBOX_VERSION=0.0.
|
|
98
|
+
ARG VOXBOX_VERSION=0.0.21
|
|
99
99
|
ARG VOXBOX_TORCH_VERSION=2.7.1
|
|
100
100
|
ARG VOXBOX_TORCH_CUDA_VERSION=${CUDA_VERSION}
|
|
101
101
|
ARG VLLM_BASE_IMAGE=gpustack/runner:cuda${CUDA_VERSION}-python${PYTHON_VERSION}
|
|
@@ -923,6 +923,28 @@
|
|
|
923
923
|
"docker_image": "gpustack/runner:cuda12.8-vllm0.10.0",
|
|
924
924
|
"deprecated": false
|
|
925
925
|
},
|
|
926
|
+
{
|
|
927
|
+
"backend": "cuda",
|
|
928
|
+
"backend_version": "12.8",
|
|
929
|
+
"original_backend_version": "12.8.1",
|
|
930
|
+
"backend_variant": "",
|
|
931
|
+
"service": "voxbox",
|
|
932
|
+
"service_version": "0.0.21",
|
|
933
|
+
"platform": "linux/amd64",
|
|
934
|
+
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
|
|
935
|
+
"deprecated": false
|
|
936
|
+
},
|
|
937
|
+
{
|
|
938
|
+
"backend": "cuda",
|
|
939
|
+
"backend_version": "12.8",
|
|
940
|
+
"original_backend_version": "12.8.1",
|
|
941
|
+
"backend_variant": "",
|
|
942
|
+
"service": "voxbox",
|
|
943
|
+
"service_version": "0.0.21",
|
|
944
|
+
"platform": "linux/arm64",
|
|
945
|
+
"docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
|
|
946
|
+
"deprecated": false
|
|
947
|
+
},
|
|
926
948
|
{
|
|
927
949
|
"backend": "cuda",
|
|
928
950
|
"backend_version": "12.8",
|
|
@@ -1099,6 +1121,28 @@
|
|
|
1099
1121
|
"docker_image": "gpustack/runner:cuda12.6-vllm0.10.0",
|
|
1100
1122
|
"deprecated": false
|
|
1101
1123
|
},
|
|
1124
|
+
{
|
|
1125
|
+
"backend": "cuda",
|
|
1126
|
+
"backend_version": "12.6",
|
|
1127
|
+
"original_backend_version": "12.6.3",
|
|
1128
|
+
"backend_variant": "",
|
|
1129
|
+
"service": "voxbox",
|
|
1130
|
+
"service_version": "0.0.21",
|
|
1131
|
+
"platform": "linux/amd64",
|
|
1132
|
+
"docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
|
|
1133
|
+
"deprecated": false
|
|
1134
|
+
},
|
|
1135
|
+
{
|
|
1136
|
+
"backend": "cuda",
|
|
1137
|
+
"backend_version": "12.6",
|
|
1138
|
+
"original_backend_version": "12.6.3",
|
|
1139
|
+
"backend_variant": "",
|
|
1140
|
+
"service": "voxbox",
|
|
1141
|
+
"service_version": "0.0.21",
|
|
1142
|
+
"platform": "linux/arm64",
|
|
1143
|
+
"docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
|
|
1144
|
+
"deprecated": false
|
|
1145
|
+
},
|
|
1102
1146
|
{
|
|
1103
1147
|
"backend": "cuda",
|
|
1104
1148
|
"backend_version": "12.6",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
git_commit = "c9f91c3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/docs/modules/gpustack_runner.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/__init__.py
RENAMED
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/__types__.py
RENAMED
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/images.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/test_runner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_current_date_time.sh
RENAMED
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_get_temperature.sh
RENAMED
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_get_weather.sh
RENAMED
|
File without changes
|
{gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_square_of_number.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|