gpustack-runner 0.1.24__tar.gz → 0.1.24.post1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/PKG-INFO +20 -38
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/README.md +19 -37
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.24.post1/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/cmds/images.py +0 -9
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/runner.py +0 -6
- gpustack_runner-0.1.24/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/.codespelldict +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/.codespellrc +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/.gitattributes +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/.gitignore +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/.python-version +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/LICENSE +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/Makefile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/docs/index.md +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/__utils__.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/envs.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/gpustack_runner/runner.py.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/hatch.toml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/.post_operation/README.md +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/corex/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/hggc/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/matrix.yaml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/musa/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pyproject.toml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/pytest.ini +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/ruff.toml +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/test_utils.py +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/activate +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/uv.lock +0 -0
- {gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.24
|
|
3
|
+
Version: 0.1.24.post1
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -46,24 +46,20 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
46
46
|
|
|
47
47
|
### Ascend CANN
|
|
48
48
|
|
|
49
|
-
> [!CAUTION]
|
|
50
|
-
> Since v0.1.23:
|
|
51
|
-
> - Deprecated MindIE `2.1.rc1`.
|
|
52
|
-
|
|
53
49
|
> [!WARNING]
|
|
54
50
|
> - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
|
|
55
51
|
are some known issues for running vLLM, you can refer to
|
|
56
52
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
57
53
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
58
54
|
|
|
59
|
-
| CANN Version <br/> (Variant) | MindIE
|
|
60
|
-
|
|
61
|
-
| 8.3 (A3/910C) | `2.2.rc1`
|
|
62
|
-
| 8.3 (910B) | `2.2.rc1`
|
|
63
|
-
| 8.3 (310P) | `2.2.rc1`
|
|
64
|
-
| 8.2 (A3/910C) | `2.1.rc2`
|
|
65
|
-
| 8.2 (910B) | `2.1.rc2
|
|
66
|
-
| 8.2 (310P) | `2.1.rc2
|
|
55
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
56
|
+
|------------------------------|-----------|------------------------------------------------------------|------------------------|
|
|
57
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
58
|
+
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
59
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
60
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
61
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
62
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
67
63
|
|
|
68
64
|
### Iluvatar CoreX
|
|
69
65
|
|
|
@@ -73,13 +69,6 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
73
69
|
|
|
74
70
|
### NVIDIA CUDA
|
|
75
71
|
|
|
76
|
-
> [!CAUTION]
|
|
77
|
-
> Since v0.1.23:
|
|
78
|
-
> - Deprecated all services for CUDA 12.4.
|
|
79
|
-
> - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
|
|
80
|
-
> - Deprecated SGLang `0.5.5`.
|
|
81
|
-
> - Deprecated VoxBox `0.0.20`.
|
|
82
|
-
|
|
83
72
|
> [!NOTE]
|
|
84
73
|
> - CUDA 12.9 supports Compute Capabilities:
|
|
85
74
|
`7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
|
|
@@ -88,12 +77,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
88
77
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
89
78
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
90
79
|
|
|
91
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
92
|
-
|
|
93
|
-
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2`
|
|
94
|
-
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`,
|
|
95
|
-
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2
|
|
96
|
-
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
|
|
80
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
81
|
+
|------------------------------|---------------------------------------------|-----------------------------------------------------------|----------|
|
|
82
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` | |
|
|
83
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3`, `0.5.4.post3` | `0.0.21` |
|
|
84
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`,`0.10.2` | | `0.0.21` |
|
|
97
85
|
|
|
98
86
|
### Hygon DTK
|
|
99
87
|
|
|
@@ -123,29 +111,23 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
123
111
|
|
|
124
112
|
### AMD ROCm
|
|
125
113
|
|
|
126
|
-
> [!CAUTION]
|
|
127
|
-
> Since v0.1.23:
|
|
128
|
-
> - Deprecated all services for ROCm 6.3.
|
|
129
|
-
> - Deprecated vLLM `0.11.0`.
|
|
130
|
-
|
|
131
114
|
> [!NOTE]
|
|
132
115
|
> - ROCm 7.0 supports LLVM targets:
|
|
133
116
|
`gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
|
|
134
|
-
> - ROCm 6.4
|
|
117
|
+
> - ROCm 6.4 supports LLVM targets:
|
|
135
118
|
`gfx908 gfx90a gfx942 gfx1030 gfx1100`.
|
|
136
119
|
|
|
137
120
|
> [!WARNING]
|
|
138
121
|
> - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
|
|
139
|
-
7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2
|
|
122
|
+
7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2`, `gfx1150/gfx1151` are not supported yet.
|
|
140
123
|
> - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
|
|
141
124
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
142
125
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
143
126
|
|
|
144
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
145
|
-
|
|
146
|
-
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2
|
|
147
|
-
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2`
|
|
148
|
-
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
|
|
127
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
128
|
+
|------------------------------|---------------------------------------------|--------------------------------------------|
|
|
129
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
|
|
130
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
|
|
149
131
|
|
|
150
132
|
## Directory Structure
|
|
151
133
|
|
|
@@ -26,24 +26,20 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
26
26
|
|
|
27
27
|
### Ascend CANN
|
|
28
28
|
|
|
29
|
-
> [!CAUTION]
|
|
30
|
-
> Since v0.1.23:
|
|
31
|
-
> - Deprecated MindIE `2.1.rc1`.
|
|
32
|
-
|
|
33
29
|
> [!WARNING]
|
|
34
30
|
> - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
|
|
35
31
|
are some known issues for running vLLM, you can refer to
|
|
36
32
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
37
33
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
38
34
|
|
|
39
|
-
| CANN Version <br/> (Variant) | MindIE
|
|
40
|
-
|
|
41
|
-
| 8.3 (A3/910C) | `2.2.rc1`
|
|
42
|
-
| 8.3 (910B) | `2.2.rc1`
|
|
43
|
-
| 8.3 (310P) | `2.2.rc1`
|
|
44
|
-
| 8.2 (A3/910C) | `2.1.rc2`
|
|
45
|
-
| 8.2 (910B) | `2.1.rc2
|
|
46
|
-
| 8.2 (310P) | `2.1.rc2
|
|
35
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
36
|
+
|------------------------------|-----------|------------------------------------------------------------|------------------------|
|
|
37
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
38
|
+
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
39
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
40
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
41
|
+
| 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
42
|
+
| 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
|
|
47
43
|
|
|
48
44
|
### Iluvatar CoreX
|
|
49
45
|
|
|
@@ -53,13 +49,6 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
53
49
|
|
|
54
50
|
### NVIDIA CUDA
|
|
55
51
|
|
|
56
|
-
> [!CAUTION]
|
|
57
|
-
> Since v0.1.23:
|
|
58
|
-
> - Deprecated all services for CUDA 12.4.
|
|
59
|
-
> - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
|
|
60
|
-
> - Deprecated SGLang `0.5.5`.
|
|
61
|
-
> - Deprecated VoxBox `0.0.20`.
|
|
62
|
-
|
|
63
52
|
> [!NOTE]
|
|
64
53
|
> - CUDA 12.9 supports Compute Capabilities:
|
|
65
54
|
`7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
|
|
@@ -68,12 +57,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
68
57
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
69
58
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
70
59
|
|
|
71
|
-
| CUDA Version <br/> (Variant) | vLLM
|
|
72
|
-
|
|
73
|
-
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2`
|
|
74
|
-
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`,
|
|
75
|
-
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2
|
|
76
|
-
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
|
|
60
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
61
|
+
|------------------------------|---------------------------------------------|-----------------------------------------------------------|----------|
|
|
62
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` | |
|
|
63
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3`, `0.5.4.post3` | `0.0.21` |
|
|
64
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`,`0.10.2` | | `0.0.21` |
|
|
77
65
|
|
|
78
66
|
### Hygon DTK
|
|
79
67
|
|
|
@@ -103,29 +91,23 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
103
91
|
|
|
104
92
|
### AMD ROCm
|
|
105
93
|
|
|
106
|
-
> [!CAUTION]
|
|
107
|
-
> Since v0.1.23:
|
|
108
|
-
> - Deprecated all services for ROCm 6.3.
|
|
109
|
-
> - Deprecated vLLM `0.11.0`.
|
|
110
|
-
|
|
111
94
|
> [!NOTE]
|
|
112
95
|
> - ROCm 7.0 supports LLVM targets:
|
|
113
96
|
`gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
|
|
114
|
-
> - ROCm 6.4
|
|
97
|
+
> - ROCm 6.4 supports LLVM targets:
|
|
115
98
|
`gfx908 gfx90a gfx942 gfx1030 gfx1100`.
|
|
116
99
|
|
|
117
100
|
> [!WARNING]
|
|
118
101
|
> - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
|
|
119
|
-
7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2
|
|
102
|
+
7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2`, `gfx1150/gfx1151` are not supported yet.
|
|
120
103
|
> - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
|
|
121
104
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
122
105
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
123
106
|
|
|
124
|
-
| ROCm Version <br/> (Variant) | vLLM
|
|
125
|
-
|
|
126
|
-
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2
|
|
127
|
-
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2`
|
|
128
|
-
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
|
|
107
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
108
|
+
|------------------------------|---------------------------------------------|--------------------------------------------|
|
|
109
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
|
|
110
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
|
|
129
111
|
|
|
130
112
|
## Directory Structure
|
|
131
113
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.24'
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1, 24)
|
|
30
|
+
__version__ = version = '0.1.24.post1'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 24, 'post1')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "ed41ee9"
|
|
@@ -1348,10 +1348,6 @@ def list_images(**kwargs) -> list[PlatformedImage]:
|
|
|
1348
1348
|
A list of platformed images.
|
|
1349
1349
|
|
|
1350
1350
|
"""
|
|
1351
|
-
# Reset to default for listing images,
|
|
1352
|
-
# in case the env is set to other value.
|
|
1353
|
-
envs.GPUSTACK_RUNNER_DEFAULT_CONTAINER_NAMESPACE = None
|
|
1354
|
-
|
|
1355
1351
|
platform = kwargs.pop("platform", None)
|
|
1356
1352
|
repository = kwargs.pop("repository", None)
|
|
1357
1353
|
|
|
@@ -1497,8 +1493,3 @@ def _execute_command(
|
|
|
1497
1493
|
args=command,
|
|
1498
1494
|
returncode=returncode,
|
|
1499
1495
|
)
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
append_images(
|
|
1503
|
-
"gpustack/runtime:pause",
|
|
1504
|
-
)
|
|
@@ -10,8 +10,6 @@ from typing import Any
|
|
|
10
10
|
|
|
11
11
|
from dataclasses_json import dataclass_json
|
|
12
12
|
|
|
13
|
-
from . import envs
|
|
14
|
-
|
|
15
13
|
_RE_DOCKER_IMAGE = re.compile(
|
|
16
14
|
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|hggc|maca|musa|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
|
|
17
15
|
)
|
|
@@ -239,10 +237,6 @@ def list_runners(**kwargs) -> Runners | list[dict]:
|
|
|
239
237
|
json_list = json.load(f)
|
|
240
238
|
runners = []
|
|
241
239
|
for item in json_list:
|
|
242
|
-
if namespace := envs.GPUSTACK_RUNNER_DEFAULT_CONTAINER_NAMESPACE:
|
|
243
|
-
docker_image = item["docker_image"]
|
|
244
|
-
docker_image = docker_image.replace("gpustack/", f"{namespace}/")
|
|
245
|
-
item["docker_image"] = docker_image
|
|
246
240
|
runners.append(Runner.from_dict(item))
|
|
247
241
|
|
|
248
242
|
todict = kwargs.pop("todict", False)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
git_commit = "c2b7172"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/fixtures/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tests/gpustack_runner/test_runner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_current_date_time.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.24 → gpustack_runner-0.1.24.post1}/tools/chat_tool_square_root_of_number.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|