gpustack-runner 0.1.23.post4__tar.gz → 0.1.23.post5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/PKG-INFO +12 -4
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/README.md +11 -3
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.23.post5/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/images.py +19 -5
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/runner.py +1 -1
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/runner.py.json +33 -0
- gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +81 -0
- gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +93 -0
- gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +78 -0
- gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +98 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/README.md +1 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/Dockerfile +100 -15
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/corex/Dockerfile +10 -7
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cuda/Dockerfile +101 -7
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/dtk/Dockerfile +28 -16
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/expand_matrix.sh +1 -1
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/maca/Dockerfile +13 -10
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/matrix.yaml +24 -0
- gpustack_runner-0.1.23.post5/pack/musa/Dockerfile +395 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/rocm/Dockerfile +108 -6
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +41 -0
- gpustack_runner-0.1.23.post4/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.codespelldict +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.codespellrc +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.gitattributes +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.gitignore +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.python-version +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/LICENSE +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/Makefile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/docs/index.md +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/envs.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/hatch.toml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pyproject.toml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pytest.ini +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/ruff.toml +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/activate +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/uv.lock +0 -0
- {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.23.
|
|
3
|
+
Version: 0.1.23.post5
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -97,9 +97,9 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
97
97
|
|
|
98
98
|
### Hygon DTK
|
|
99
99
|
|
|
100
|
-
| DTK Version <br/> (Variant) | vLLM
|
|
101
|
-
|
|
102
|
-
| 25.04 | `0.9.2`, `0.8.5` |
|
|
100
|
+
| DTK Version <br/> (Variant) | vLLM |
|
|
101
|
+
|-----------------------------|----------------------------|
|
|
102
|
+
| 25.04 | `0.11.0`, `0.9.2`, `0.8.5` |
|
|
103
103
|
|
|
104
104
|
### MetaX MACA
|
|
105
105
|
|
|
@@ -108,6 +108,13 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
108
108
|
| 3.2 | `0.10.2` |
|
|
109
109
|
| 3.0 | `0.9.1` |
|
|
110
110
|
|
|
111
|
+
### MThreads MUSA
|
|
112
|
+
|
|
113
|
+
| MUSA Version <br/> (Variant) | vLLM | SGLang |
|
|
114
|
+
|------------------------------|---------|---------|
|
|
115
|
+
| 4.3.2 | | `0.5.2` |
|
|
116
|
+
| 4.1.0 | `0.9.2` | |
|
|
117
|
+
|
|
111
118
|
### AMD ROCm
|
|
112
119
|
|
|
113
120
|
> [!CAUTION]
|
|
@@ -171,6 +178,7 @@ ARG PYTHON_VERSION=... # REQUIRED
|
|
|
171
178
|
ARG CMAKE_MAX_JOBS=... # REQUIRED
|
|
172
179
|
ARG {OTHERS} # OPTIONAL
|
|
173
180
|
ARG {BACKEND}_VERSION=... # REQUIRED
|
|
181
|
+
ARG {BACKEND}_VERSION_EXTRA=... # OPTIONAL
|
|
174
182
|
ARG {BACKEND}_ARCHS=... # REQUIRED
|
|
175
183
|
ARG {BACKEND}_{OTHERS}=... # OPTIONAL
|
|
176
184
|
ARG {SERVICE}_BASE_IMAGE=... # REQUIRED
|
|
@@ -77,9 +77,9 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
77
77
|
|
|
78
78
|
### Hygon DTK
|
|
79
79
|
|
|
80
|
-
| DTK Version <br/> (Variant) | vLLM
|
|
81
|
-
|
|
82
|
-
| 25.04 | `0.9.2`, `0.8.5` |
|
|
80
|
+
| DTK Version <br/> (Variant) | vLLM |
|
|
81
|
+
|-----------------------------|----------------------------|
|
|
82
|
+
| 25.04 | `0.11.0`, `0.9.2`, `0.8.5` |
|
|
83
83
|
|
|
84
84
|
### MetaX MACA
|
|
85
85
|
|
|
@@ -88,6 +88,13 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
88
88
|
| 3.2 | `0.10.2` |
|
|
89
89
|
| 3.0 | `0.9.1` |
|
|
90
90
|
|
|
91
|
+
### MThreads MUSA
|
|
92
|
+
|
|
93
|
+
| MUSA Version <br/> (Variant) | vLLM | SGLang |
|
|
94
|
+
|------------------------------|---------|---------|
|
|
95
|
+
| 4.3.2 | | `0.5.2` |
|
|
96
|
+
| 4.1.0 | `0.9.2` | |
|
|
97
|
+
|
|
91
98
|
### AMD ROCm
|
|
92
99
|
|
|
93
100
|
> [!CAUTION]
|
|
@@ -151,6 +158,7 @@ ARG PYTHON_VERSION=... # REQUIRED
|
|
|
151
158
|
ARG CMAKE_MAX_JOBS=... # REQUIRED
|
|
152
159
|
ARG {OTHERS} # OPTIONAL
|
|
153
160
|
ARG {BACKEND}_VERSION=... # REQUIRED
|
|
161
|
+
ARG {BACKEND}_VERSION_EXTRA=... # OPTIONAL
|
|
154
162
|
ARG {BACKEND}_ARCHS=... # REQUIRED
|
|
155
163
|
ARG {BACKEND}_{OTHERS}=... # OPTIONAL
|
|
156
164
|
ARG {SERVICE}_BASE_IMAGE=... # REQUIRED
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.23.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1, 23, '
|
|
30
|
+
__version__ = version = '0.1.23.post5'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 23, 'post5')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "d297d69"
|
{gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/images.py
RENAMED
|
@@ -444,14 +444,14 @@ class SaveImagesSubCommand(SubCommand):
|
|
|
444
444
|
|
|
445
445
|
command = [
|
|
446
446
|
"skopeo",
|
|
447
|
-
"--override-os",
|
|
448
|
-
override_os,
|
|
449
|
-
"--override-arch",
|
|
450
|
-
override_arch,
|
|
451
447
|
"copy",
|
|
452
448
|
"--src-tls-verify=false",
|
|
453
449
|
"--retry-times",
|
|
454
450
|
str(self.max_retries),
|
|
451
|
+
"--override-os",
|
|
452
|
+
override_os,
|
|
453
|
+
"--override-arch",
|
|
454
|
+
override_arch,
|
|
455
455
|
]
|
|
456
456
|
if self.source_username and self.source_password:
|
|
457
457
|
command.extend(
|
|
@@ -771,6 +771,10 @@ class CopyImagesSubCommand(SubCommand):
|
|
|
771
771
|
print(f"❌ Error syncing image '{img_name}'")
|
|
772
772
|
failures.append((img_name, img_err))
|
|
773
773
|
|
|
774
|
+
override_os, override_arch = None, None
|
|
775
|
+
if self.platform:
|
|
776
|
+
override_os, override_arch = self.platform.split("/", maxsplit=1)
|
|
777
|
+
|
|
774
778
|
# Submit tasks
|
|
775
779
|
for img in images:
|
|
776
780
|
command = [
|
|
@@ -778,10 +782,20 @@ class CopyImagesSubCommand(SubCommand):
|
|
|
778
782
|
"copy",
|
|
779
783
|
"--src-tls-verify=false",
|
|
780
784
|
"--dest-tls-verify=false",
|
|
781
|
-
"--all",
|
|
782
785
|
"--retry-times",
|
|
783
786
|
str(self.max_retries),
|
|
784
787
|
]
|
|
788
|
+
if override_os and override_arch:
|
|
789
|
+
command.extend(
|
|
790
|
+
[
|
|
791
|
+
"--override-os",
|
|
792
|
+
override_os,
|
|
793
|
+
"--override-arch",
|
|
794
|
+
override_arch,
|
|
795
|
+
],
|
|
796
|
+
)
|
|
797
|
+
else:
|
|
798
|
+
command.append("--all")
|
|
785
799
|
if self.source_username and self.source_password:
|
|
786
800
|
command.extend(
|
|
787
801
|
[
|
|
@@ -13,7 +13,7 @@ from dataclasses_json import dataclass_json
|
|
|
13
13
|
from . import envs
|
|
14
14
|
|
|
15
15
|
_RE_DOCKER_IMAGE = re.compile(
|
|
16
|
-
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
|
|
16
|
+
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|musa|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
|
|
17
17
|
)
|
|
18
18
|
"""
|
|
19
19
|
Regex for Docker image parsing,
|
{gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/runner.py.json
RENAMED
|
@@ -1363,6 +1363,17 @@
|
|
|
1363
1363
|
"docker_image": "gpustack/runner:cuda12.4-voxbox0.0.20",
|
|
1364
1364
|
"deprecated": true
|
|
1365
1365
|
},
|
|
1366
|
+
{
|
|
1367
|
+
"backend": "dtk",
|
|
1368
|
+
"backend_version": "25.04",
|
|
1369
|
+
"original_backend_version": "25.04.2",
|
|
1370
|
+
"backend_variant": "",
|
|
1371
|
+
"service": "vllm",
|
|
1372
|
+
"service_version": "0.11.0",
|
|
1373
|
+
"platform": "linux/amd64",
|
|
1374
|
+
"docker_image": "gpustack/runner:dtk25.04-vllm0.11.0",
|
|
1375
|
+
"deprecated": false
|
|
1376
|
+
},
|
|
1366
1377
|
{
|
|
1367
1378
|
"backend": "dtk",
|
|
1368
1379
|
"backend_version": "25.04",
|
|
@@ -1407,6 +1418,28 @@
|
|
|
1407
1418
|
"docker_image": "gpustack/runner:maca3.0-vllm0.9.1",
|
|
1408
1419
|
"deprecated": false
|
|
1409
1420
|
},
|
|
1421
|
+
{
|
|
1422
|
+
"backend": "musa",
|
|
1423
|
+
"backend_version": "4.3",
|
|
1424
|
+
"original_backend_version": "4.3.2",
|
|
1425
|
+
"backend_variant": "",
|
|
1426
|
+
"service": "sglang",
|
|
1427
|
+
"service_version": "0.5.7",
|
|
1428
|
+
"platform": "linux/amd64",
|
|
1429
|
+
"docker_image": "gpustack/runner:musa4.3-sglang0.5.7",
|
|
1430
|
+
"deprecated": false
|
|
1431
|
+
},
|
|
1432
|
+
{
|
|
1433
|
+
"backend": "musa",
|
|
1434
|
+
"backend_version": "4.1",
|
|
1435
|
+
"original_backend_version": "4.1.0",
|
|
1436
|
+
"backend_variant": "",
|
|
1437
|
+
"service": "vllm",
|
|
1438
|
+
"service_version": "0.9.2",
|
|
1439
|
+
"platform": "linux/amd64",
|
|
1440
|
+
"docker_image": "gpustack/runner:musa4.1-vllm0.9.2",
|
|
1441
|
+
"deprecated": false
|
|
1442
|
+
},
|
|
1410
1443
|
{
|
|
1411
1444
|
"backend": "rocm",
|
|
1412
1445
|
"backend_version": "7.0",
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CANN_VERSION=8.3
|
|
3
|
+
ARG CANN_ARCHS=910b
|
|
4
|
+
ARG VLLM_VERSION=0.12.0
|
|
5
|
+
ARG VLLM_OMNI_COMMIT=75cdf1c
|
|
6
|
+
|
|
7
|
+
FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-vllm${VLLM_VERSION} AS vllm-build-omni
|
|
8
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
9
|
+
|
|
10
|
+
ARG TARGETPLATFORM
|
|
11
|
+
ARG TARGETOS
|
|
12
|
+
ARG TARGETARCH
|
|
13
|
+
|
|
14
|
+
## Build Omni
|
|
15
|
+
|
|
16
|
+
ARG CMAKE_MAX_JOBS
|
|
17
|
+
ARG VLLM_OMNI_COMMIT
|
|
18
|
+
|
|
19
|
+
ENV VLLM_OMNI_COMMIT=${VLLM_OMNI_COMMIT}
|
|
20
|
+
|
|
21
|
+
RUN <<EOF
|
|
22
|
+
# Omni
|
|
23
|
+
|
|
24
|
+
CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
25
|
+
if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
|
|
26
|
+
CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
|
|
27
|
+
fi
|
|
28
|
+
if (( $(echo "${CMAKE_MAX_JOBS} > 4" | bc -l) )); then
|
|
29
|
+
CMAKE_MAX_JOBS="4"
|
|
30
|
+
fi
|
|
31
|
+
export MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
32
|
+
export COMPILE_CUSTOM_KERNELS=1
|
|
33
|
+
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CANN_HOME}/ascend-toolkit/latest/$(uname -i)-linux/devlib"
|
|
34
|
+
export VLLM_TARGET_DEVICE="empty"
|
|
35
|
+
echo "Building vLLM Omni with the following environment variables:"
|
|
36
|
+
env
|
|
37
|
+
|
|
38
|
+
# Build
|
|
39
|
+
git -C /tmp clone --recursive --shallow-submodules \
|
|
40
|
+
https://github.com/vllm-project/vllm-omni vllm_omni \
|
|
41
|
+
&& pushd /tmp/vllm_omni \
|
|
42
|
+
&& git checkout ${VLLM_OMNI_COMMIT} \
|
|
43
|
+
&& git submodule update --init --recursive
|
|
44
|
+
pushd /tmp/vllm_omni \
|
|
45
|
+
&& python -v -m build --no-isolation --wheel \
|
|
46
|
+
&& tree -hs /tmp/vllm_omni/dist \
|
|
47
|
+
&& mv /tmp/vllm_omni/dist /workspace
|
|
48
|
+
|
|
49
|
+
# Cleanup
|
|
50
|
+
rm -rf /var/tmp/* \
|
|
51
|
+
&& rm -rf /tmp/*
|
|
52
|
+
EOF
|
|
53
|
+
|
|
54
|
+
FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-vllm${VLLM_VERSION} AS vllm
|
|
55
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
56
|
+
|
|
57
|
+
ARG TARGETPLATFORM
|
|
58
|
+
ARG TARGETOS
|
|
59
|
+
ARG TARGETARCH
|
|
60
|
+
|
|
61
|
+
## Install Omni
|
|
62
|
+
|
|
63
|
+
RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
|
|
64
|
+
# Omni
|
|
65
|
+
|
|
66
|
+
# Install
|
|
67
|
+
uv pip install --no-build-isolation \
|
|
68
|
+
/omni/workspace/*.whl
|
|
69
|
+
|
|
70
|
+
# Review
|
|
71
|
+
uv pip tree
|
|
72
|
+
|
|
73
|
+
# Cleanup
|
|
74
|
+
rm -rf /var/tmp/* \
|
|
75
|
+
&& rm -rf /tmp/*
|
|
76
|
+
EOF
|
|
77
|
+
|
|
78
|
+
## Entrypoint
|
|
79
|
+
|
|
80
|
+
WORKDIR /
|
|
81
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG CUDA_VERSION=12.8
|
|
3
|
+
ARG VLLM_VERSION=0.12.0
|
|
4
|
+
ARG VLLM_OMNI_COMMIT=75cdf1c
|
|
5
|
+
|
|
6
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm-build-omni
|
|
7
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
8
|
+
|
|
9
|
+
ARG TARGETPLATFORM
|
|
10
|
+
ARG TARGETOS
|
|
11
|
+
ARG TARGETARCH
|
|
12
|
+
|
|
13
|
+
ARG TARGETPLATFORM
|
|
14
|
+
ARG TARGETOS
|
|
15
|
+
ARG TARGETARCH
|
|
16
|
+
|
|
17
|
+
## Build Omni
|
|
18
|
+
|
|
19
|
+
ARG CMAKE_MAX_JOBS
|
|
20
|
+
ARG VLLM_OMNI_COMMIT
|
|
21
|
+
|
|
22
|
+
ENV VLLM_OMNI_COMMIT=${VLLM_OMNI_COMMIT}
|
|
23
|
+
|
|
24
|
+
RUN <<EOF
|
|
25
|
+
# Omni
|
|
26
|
+
|
|
27
|
+
IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
|
|
28
|
+
|
|
29
|
+
CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
30
|
+
if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
|
|
31
|
+
CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
|
|
32
|
+
fi
|
|
33
|
+
if (( $(echo "${CMAKE_MAX_JOBS} > 4" | bc -l) )); then
|
|
34
|
+
CMAKE_MAX_JOBS="4"
|
|
35
|
+
fi
|
|
36
|
+
VL_CUDA_ARCHS="${CUDA_ARCHS}"
|
|
37
|
+
if [[ -z "${VL_CUDA_ARCHS}" ]]; then
|
|
38
|
+
if (( $(echo "${CUDA_MAJOR}.${CUDA_MINOR} < 12.9" | bc -l) )); then
|
|
39
|
+
VL_CUDA_ARCHS="7.5 8.0+PTX 8.9 9.0 10.0+PTX 12.0+PTX"
|
|
40
|
+
else
|
|
41
|
+
VL_CUDA_ARCHS="7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX"
|
|
42
|
+
fi
|
|
43
|
+
fi
|
|
44
|
+
export MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
45
|
+
export TORCH_CUDA_ARCH_LIST="${VL_CUDA_ARCHS}"
|
|
46
|
+
export NVCC_THREADS=1
|
|
47
|
+
echo "Building vLLM Omni with the following environment variables:"
|
|
48
|
+
env
|
|
49
|
+
|
|
50
|
+
# Build
|
|
51
|
+
git -C /tmp clone --recursive --shallow-submodules \
|
|
52
|
+
https://github.com/vllm-project/vllm-omni vllm_omni \
|
|
53
|
+
&& pushd /tmp/vllm_omni \
|
|
54
|
+
&& git checkout ${VLLM_OMNI_COMMIT} \
|
|
55
|
+
&& git submodule update --init --recursive
|
|
56
|
+
pushd /tmp/vllm_omni \
|
|
57
|
+
&& python -v -m build --no-isolation --wheel \
|
|
58
|
+
&& tree -hs /tmp/vllm_omni/dist \
|
|
59
|
+
&& mv /tmp/vllm_omni/dist /workspace
|
|
60
|
+
|
|
61
|
+
# Cleanup
|
|
62
|
+
rm -rf /var/tmp/* \
|
|
63
|
+
&& rm -rf /tmp/*
|
|
64
|
+
EOF
|
|
65
|
+
|
|
66
|
+
FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
|
|
67
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
68
|
+
|
|
69
|
+
ARG TARGETPLATFORM
|
|
70
|
+
ARG TARGETOS
|
|
71
|
+
ARG TARGETARCH
|
|
72
|
+
|
|
73
|
+
## Install Omni
|
|
74
|
+
|
|
75
|
+
RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
|
|
76
|
+
# Omni
|
|
77
|
+
|
|
78
|
+
# Install
|
|
79
|
+
uv pip install --no-build-isolation \
|
|
80
|
+
/omni/workspace/*.whl
|
|
81
|
+
|
|
82
|
+
# Review
|
|
83
|
+
uv pip tree
|
|
84
|
+
|
|
85
|
+
# Cleanup
|
|
86
|
+
rm -rf /var/tmp/* \
|
|
87
|
+
&& rm -rf /tmp/*
|
|
88
|
+
EOF
|
|
89
|
+
|
|
90
|
+
## Entrypoint
|
|
91
|
+
|
|
92
|
+
WORKDIR /
|
|
93
|
+
ENTRYPOINT [ "tini", "--" ]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# NVIDIA CUDA
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
## Packed NVIDIA CUDA 12.9.
|
|
8
|
+
##
|
|
9
|
+
- backend: "cuda"
|
|
10
|
+
services:
|
|
11
|
+
- "vllm"
|
|
12
|
+
args:
|
|
13
|
+
- "CUDA_VERSION=12.9"
|
|
14
|
+
- "VLLM_VERSION=0.12.0"
|
|
15
|
+
## Packed NVIDIA CUDA 12.8.
|
|
16
|
+
##
|
|
17
|
+
- backend: "cuda"
|
|
18
|
+
services:
|
|
19
|
+
- "vllm"
|
|
20
|
+
args:
|
|
21
|
+
- "CUDA_VERSION=12.8"
|
|
22
|
+
- "VLLM_VERSION=0.12.0"
|
|
23
|
+
## Packed NVIDIA CUDA 12.6.
|
|
24
|
+
##
|
|
25
|
+
- backend: "cuda"
|
|
26
|
+
services:
|
|
27
|
+
- "vllm"
|
|
28
|
+
args:
|
|
29
|
+
- "CUDA_VERSION=12.6"
|
|
30
|
+
- "VLLM_VERSION=0.12.0"
|
|
31
|
+
|
|
32
|
+
#
|
|
33
|
+
# AMD ROCm
|
|
34
|
+
#
|
|
35
|
+
|
|
36
|
+
## Packed AMD ROCm 7.0.
|
|
37
|
+
##
|
|
38
|
+
- backend: "rocm"
|
|
39
|
+
services:
|
|
40
|
+
- "vllm"
|
|
41
|
+
platforms:
|
|
42
|
+
- "linux/amd64"
|
|
43
|
+
args:
|
|
44
|
+
- "ROCM_VERSION=7.0"
|
|
45
|
+
- "VLLM_VERSION=0.12.0"
|
|
46
|
+
## Packed AMD ROCm 6.4.
|
|
47
|
+
##
|
|
48
|
+
- backend: "rocm"
|
|
49
|
+
services:
|
|
50
|
+
- "vllm"
|
|
51
|
+
platforms:
|
|
52
|
+
- "linux/amd64"
|
|
53
|
+
args:
|
|
54
|
+
- "ROCM_VERSION=6.4"
|
|
55
|
+
- "VLLM_VERSION=0.12.0"
|
|
56
|
+
|
|
57
|
+
#
|
|
58
|
+
# Ascend CANN
|
|
59
|
+
#
|
|
60
|
+
|
|
61
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for A3.
|
|
62
|
+
##
|
|
63
|
+
- backend: "cann"
|
|
64
|
+
services:
|
|
65
|
+
- "vllm"
|
|
66
|
+
args:
|
|
67
|
+
- "CANN_VERSION=8.3"
|
|
68
|
+
- "CANN_ARCHS=a3"
|
|
69
|
+
- "VLLM_VERSION=0.12.0"
|
|
70
|
+
## Packed Ascend CANN 8.3, using CANN Kernel for 910B.
|
|
71
|
+
##
|
|
72
|
+
- backend: "cann"
|
|
73
|
+
services:
|
|
74
|
+
- "vllm"
|
|
75
|
+
args:
|
|
76
|
+
- "CANN_VERSION=8.3"
|
|
77
|
+
- "CANN_ARCHS=910b"
|
|
78
|
+
- "VLLM_VERSION=0.12.0"
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
ARG CMAKE_MAX_JOBS
|
|
2
|
+
ARG ROCM_VERSION=6.4
|
|
3
|
+
ARG VLLM_VERSION=0.12.0
|
|
4
|
+
ARG VLLM_OMNI_COMMIT=75cdf1c
|
|
5
|
+
|
|
6
|
+
FROM gpustack/runner:rocm${ROCM_VERSION}-vllm${VLLM_VERSION} AS vllm-build-omni
|
|
7
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
8
|
+
|
|
9
|
+
ARG TARGETPLATFORM
|
|
10
|
+
ARG TARGETOS
|
|
11
|
+
ARG TARGETARCH
|
|
12
|
+
|
|
13
|
+
## Build Omni
|
|
14
|
+
|
|
15
|
+
ARG CMAKE_MAX_JOBS
|
|
16
|
+
ARG VLLM_OMNI_COMMIT
|
|
17
|
+
|
|
18
|
+
ENV VLLM_OMNI_COMMIT=${VLLM_OMNI_COMMIT}
|
|
19
|
+
|
|
20
|
+
RUN <<EOF
|
|
21
|
+
# Omni
|
|
22
|
+
|
|
23
|
+
IFS="." read -r ROCM_MAJOR ROCM_MINOR ROCM_PATCH <<< "${VLLM_TORCH_ROCM_VERSION}"
|
|
24
|
+
IFS="." read -r VL_MAJOR VL_MINOR VL_PATCH <<< "${VLLM_VERSION}"
|
|
25
|
+
|
|
26
|
+
CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
27
|
+
if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
|
|
28
|
+
CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
|
|
29
|
+
fi
|
|
30
|
+
if (( $(echo "${CMAKE_MAX_JOBS} > 4" | bc -l) )); then
|
|
31
|
+
CMAKE_MAX_JOBS="4"
|
|
32
|
+
fi
|
|
33
|
+
VL_ROCM_ARCHS="${ROCM_ARCHS}"
|
|
34
|
+
if [[ -z "${VL_ROCM_ARCHS}" ]]; then
|
|
35
|
+
if (( $(echo "${ROCM_MAJOR}.${ROCM_MINOR} < 7.0" | bc -l) )); then
|
|
36
|
+
VL_ROCM_ARCHS="gfx908;gfx90a;gfx942;gfx1030;gfx1100"
|
|
37
|
+
if (( $(echo "${VL_MAJOR}.${VL_MINOR} == 0.13" | bc -l) )); then
|
|
38
|
+
# TODO(thxCode): Temporarily remove gfx1030 for vLLM ROCm build due to build error in ROCm 6.4.4.
|
|
39
|
+
# #15 134.9 /tmp/vllm/build/temp.linux-x86_64-cpython-312/csrc/sampler.hip:564:63: error: local memory (66032) exceeds limit (65536) in 'void vllm::topKPerRowDecode<1024, true, false, true>(float const*, int const*, int*, int, int, int, int, float*, int, int const*)'
|
|
40
|
+
# ##15 134.9 564 | static __global__ __launch_bounds__(kNumThreadsPerBlock) void topKPerRowDecode(
|
|
41
|
+
# ##15 134.9 | ^
|
|
42
|
+
# ##15 134.9 16 warnings and 1 error generated when compiling for gfx1030.
|
|
43
|
+
VL_ROCM_ARCHS="gfx908;gfx90a;gfx942"
|
|
44
|
+
fi
|
|
45
|
+
else
|
|
46
|
+
VL_ROCM_ARCHS="gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151"
|
|
47
|
+
fi
|
|
48
|
+
fi
|
|
49
|
+
export MAX_JOBS="${CMAKE_MAX_JOBS}"
|
|
50
|
+
export COMPILE_CUSTOM_KERNELS=1
|
|
51
|
+
export PYTORCH_ROCM_ARCH="${VL_ROCM_ARCHS}"
|
|
52
|
+
echo "Building vLLM Omni with the following environment variables:"
|
|
53
|
+
env
|
|
54
|
+
|
|
55
|
+
# Build
|
|
56
|
+
git -C /tmp clone --recursive --shallow-submodules \
|
|
57
|
+
https://github.com/vllm-project/vllm-omni vllm_omni \
|
|
58
|
+
&& pushd /tmp/vllm_omni \
|
|
59
|
+
&& git checkout ${VLLM_OMNI_COMMIT} \
|
|
60
|
+
&& git submodule update --init --recursive
|
|
61
|
+
pushd /tmp/vllm_omni \
|
|
62
|
+
&& python -v -m build --no-isolation --wheel \
|
|
63
|
+
&& tree -hs /tmp/vllm_omni/dist \
|
|
64
|
+
&& mv /tmp/vllm_omni/dist /workspace
|
|
65
|
+
|
|
66
|
+
# Cleanup
|
|
67
|
+
rm -rf /var/tmp/* \
|
|
68
|
+
&& rm -rf /tmp/*
|
|
69
|
+
EOF
|
|
70
|
+
|
|
71
|
+
FROM gpustack/runner:rocm${ROCM_VERSION}-vllm${VLLM_VERSION} AS vllm
|
|
72
|
+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
|
|
73
|
+
|
|
74
|
+
ARG TARGETPLATFORM
|
|
75
|
+
ARG TARGETOS
|
|
76
|
+
ARG TARGETARCH
|
|
77
|
+
|
|
78
|
+
## Install Omni
|
|
79
|
+
|
|
80
|
+
RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
|
|
81
|
+
# Omni
|
|
82
|
+
|
|
83
|
+
# Install
|
|
84
|
+
uv pip install --no-build-isolation \
|
|
85
|
+
/omni/workspace/*.whl
|
|
86
|
+
|
|
87
|
+
# Review
|
|
88
|
+
uv pip tree
|
|
89
|
+
|
|
90
|
+
# Cleanup
|
|
91
|
+
rm -rf /var/tmp/* \
|
|
92
|
+
&& rm -rf /tmp/*
|
|
93
|
+
EOF
|
|
94
|
+
|
|
95
|
+
## Entrypoint
|
|
96
|
+
|
|
97
|
+
WORKDIR /
|
|
98
|
+
ENTRYPOINT [ "tini", "--" ]
|
{gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/README.md
RENAMED
|
@@ -32,3 +32,4 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
|
|
|
32
32
|
- [x] 2025-12-19: Install `vLLM[audio]` packages for vLLM 0.12.0/0.11.2 of CUDA/ROCm released images.
|
|
33
33
|
- [x] 2025-12-19: Install `petit-kernel` package for vLLM 0.12.0/0.11.2 and SGLang 0.5.6.post2/0.5.5.post3 of ROcm released images.
|
|
34
34
|
- [x] 2025-12-24: Apply ATB config patches to MindIE 2.2.rc1 for CANN released images.
|
|
35
|
+
- [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
|