gpustack-runner 0.1.22.post5__tar.gz → 0.1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/.gitignore +3 -0
  2. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/PKG-INFO +39 -36
  3. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/README.md +38 -35
  4. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/_version.py +2 -2
  5. gpustack_runner-0.1.23/gpustack_runner/_version_appendix.py +1 -0
  6. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/cmds/images.py +4 -0
  7. gpustack_runner-0.1.23/gpustack_runner/envs.py +112 -0
  8. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/runner.py +14 -6
  9. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/runner.py.json +79 -35
  10. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/cann/Dockerfile +2 -1
  11. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/test_docker_image.json +5 -5
  12. gpustack_runner-0.1.23/tests/gpustack_runner/fixtures/test_list_backend_runners.json +51 -0
  13. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +79 -35
  14. gpustack_runner-0.1.23/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +68 -0
  15. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/test_list_service_runners.json +69 -54
  16. gpustack_runner-0.1.22.post5/gpustack_runner/_version_appendix.py +0 -1
  17. gpustack_runner-0.1.22.post5/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -145
  18. gpustack_runner-0.1.22.post5/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -68
  19. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/.codespelldict +0 -0
  20. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/.codespellrc +0 -0
  21. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/.gitattributes +0 -0
  22. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/.pre-commit-config.yaml +0 -0
  23. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/.python-version +0 -0
  24. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/LICENSE +0 -0
  25. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/Makefile +0 -0
  26. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/docs/index.md +0 -0
  27. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/docs/modules/gpustack_runner.md +0 -0
  28. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/__init__.py +0 -0
  29. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/__main__.py +0 -0
  30. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/_version.pyi +0 -0
  31. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/cmds/__init__.py +0 -0
  32. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/gpustack_runner/cmds/__types__.py +0 -0
  33. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/hatch.toml +0 -0
  34. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/mkdocs.yml +0 -0
  35. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
  36. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
  37. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
  38. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
  39. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
  40. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
  41. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
  42. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
  43. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
  44. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
  45. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
  46. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
  47. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
  48. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  49. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
  50. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
  51. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
  52. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
  53. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
  54. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
  55. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
  56. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
  57. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
  58. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  59. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
  60. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
  61. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
  62. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
  63. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
  64. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
  65. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
  66. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
  67. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
  68. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
  69. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
  70. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
  71. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
  72. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
  73. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
  74. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
  75. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
  76. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
  77. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
  78. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
  79. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
  80. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
  81. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
  82. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
  83. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
  84. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
  85. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
  86. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
  87. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
  88. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
  89. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
  90. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/.post_operation/README.md +0 -0
  91. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  92. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  93. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/cann/patches/mindie.zip +0 -0
  94. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/corex/Dockerfile +0 -0
  95. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/cuda/Dockerfile +0 -0
  96. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/discard_runner.sh +0 -0
  97. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/dtk/Dockerfile +0 -0
  98. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/expand_matrix.sh +0 -0
  99. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/maca/Dockerfile +0 -0
  100. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/matrix.yaml +0 -0
  101. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/merge_runner.sh +0 -0
  102. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/prune_runner.sh +0 -0
  103. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/rocm/Dockerfile +0 -0
  104. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
  105. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pyproject.toml +0 -0
  106. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/pytest.ini +0 -0
  107. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/ruff.toml +0 -0
  108. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/__init__.py +0 -0
  109. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tests/gpustack_runner/test_runner.py +0 -0
  110. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/activate +0 -0
  111. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat.sh +0 -0
  112. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat_tool_current_date_time.sh +0 -0
  113. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat_tool_get_temperature.sh +0 -0
  114. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat_tool_get_weather.sh +0 -0
  115. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat_tool_square_of_number.sh +0 -0
  116. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat_tool_square_root_of_number.sh +0 -0
  117. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/chat_tool_where_am_i.sh +0 -0
  118. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/run_runner.sh +0 -0
  119. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/tools/run_runner_cluster.sh +0 -0
  120. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/uv.lock +0 -0
  121. {gpustack_runner-0.1.22.post5 → gpustack_runner-0.1.23}/uv.toml +0 -0
@@ -57,3 +57,6 @@ coverage.xml
57
57
  *.cover
58
58
  *.py,cover
59
59
  .hypothesis/
60
+
61
+ # Test
62
+ pack/cann/mindie-*/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.22.post5
3
+ Version: 0.1.23
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -46,26 +46,24 @@ The following table lists the supported accelerated backends and their correspon
46
46
 
47
47
  ### Ascend CANN
48
48
 
49
+ > [!CAUTION]
50
+ > Since v0.1.23:
51
+ > - Deprecated MindIE `2.1.rc1`.
52
+
49
53
  > [!WARNING]
50
54
  > - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
51
55
  are some known issues for running vLLM, you can refer to
52
56
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
53
57
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
54
58
 
55
- > [!IMPORTANT]
56
- > - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
57
- > - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
58
- > - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
59
- > - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
60
-
61
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
62
- |------------------------------|--------------------------|--------------------------------------------------------------------------|------------------------|
63
- | 8.3 (A3/910C) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
64
- | 8.3 (910B) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
65
- | 8.3 (310P) | **`2.2.rc1`** | | |
66
- | 8.2 (A3/910C) | **`2.1.rc2`** | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1` | `0.5.2`, `0.5.1.post3` |
67
- | 8.2 (910B) | **`2.1.rc2`**, `2.1.rc1` | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`, <br/>`0.9.2`, `0.9.1` | `0.5.2`, `0.5.1.post3` |
68
- | 8.2 (310P) | **`2.1.rc2`**, `2.1.rc1` | `0.10.0`, `0.9.2` | |
59
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
60
+ |------------------------------|--------------------------|------------------------------------------------------------|------------------------|
61
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
62
+ | 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
63
+ | 8.3 (310P) | `2.2.rc1` | | |
64
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
65
+ | 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
66
+ | 8.2 (310P) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.0`, `0.9.2` | |
69
67
 
70
68
  ### Iluvatar CoreX
71
69
 
@@ -75,6 +73,13 @@ The following table lists the supported accelerated backends and their correspon
75
73
 
76
74
  ### NVIDIA CUDA
77
75
 
76
+ > [!CAUTION]
77
+ > Since v0.1.23:
78
+ > - Deprecated all services for CUDA 12.4.
79
+ > - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
80
+ > - Deprecated SGLang `0.5.5`.
81
+ > - Deprecated VoxBox `0.0.20`.
82
+
78
83
  > [!NOTE]
79
84
  > - CUDA 12.9 supports Compute Capabilities:
80
85
  `7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
@@ -83,16 +88,12 @@ The following table lists the supported accelerated backends and their correspon
83
88
  > - CUDA 12.6/12.4 supports Compute Capabilities:
84
89
  `7.5 8.0+PTX 8.9 9.0+PTX`.
85
90
 
86
- > [!IMPORTANT]
87
- > - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
88
- > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
89
-
90
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
91
- |------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
92
- | 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
93
- | 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
94
- | 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
95
- | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
91
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
92
+ |------------------------------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------------|
93
+ | 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
94
+ | 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
95
+ | 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2` | `0.0.21`, ~~`0.0.20`~~ |
96
+ | 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
96
97
 
97
98
  ### Hygon DTK
98
99
 
@@ -109,6 +110,11 @@ The following table lists the supported accelerated backends and their correspon
109
110
 
110
111
  ### AMD ROCm
111
112
 
113
+ > [!CAUTION]
114
+ > Since v0.1.23:
115
+ > Deprecated all services for ROCm 6.3.
116
+ > Deprecated vLLM `0.11.0`.
117
+
112
118
  > [!NOTE]
113
119
  > - ROCm 7.0 supports LLVM targets:
114
120
  `gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
@@ -118,18 +124,15 @@ The following table lists the supported accelerated backends and their correspon
118
124
  > [!WARNING]
119
125
  > - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
120
126
  7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2/0.11.0`, `gfx1150/gfx1151` are not supported yet.
121
- > - SGLang supports `gfx942` only.
122
127
  > - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
123
-
124
- > [!IMPORTANT]
125
- > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
126
- > - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
127
-
128
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
129
- |------------------------------|-------------------------------------------------|----------------------------------|
130
- | 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
131
- | 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
132
- | 6.3 | `0.10.1.1`, `0.10.0` | |
128
+ > - ROCm 6.4 SGLang supports `gfx942` only.
129
+ > - ROCm 7.0 SGLang supports `gfx950` only.
130
+
131
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
+ |------------------------------|-------------------------------------------------|------------------------------|
133
+ | 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
134
+ | 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
135
+ | 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
133
136
 
134
137
  ## Directory Structure
135
138
 
@@ -26,26 +26,24 @@ The following table lists the supported accelerated backends and their correspon
26
26
 
27
27
  ### Ascend CANN
28
28
 
29
+ > [!CAUTION]
30
+ > Since v0.1.23:
31
+ > - Deprecated MindIE `2.1.rc1`.
32
+
29
33
  > [!WARNING]
30
34
  > - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
31
35
  are some known issues for running vLLM, you can refer to
32
36
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
33
37
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
34
38
 
35
- > [!IMPORTANT]
36
- > - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
37
- > - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
38
- > - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
39
- > - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
40
-
41
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
42
- |------------------------------|--------------------------|--------------------------------------------------------------------------|------------------------|
43
- | 8.3 (A3/910C) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
44
- | 8.3 (910B) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
45
- | 8.3 (310P) | **`2.2.rc1`** | | |
46
- | 8.2 (A3/910C) | **`2.1.rc2`** | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1` | `0.5.2`, `0.5.1.post3` |
47
- | 8.2 (910B) | **`2.1.rc2`**, `2.1.rc1` | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`, <br/>`0.9.2`, `0.9.1` | `0.5.2`, `0.5.1.post3` |
48
- | 8.2 (310P) | **`2.1.rc2`**, `2.1.rc1` | `0.10.0`, `0.9.2` | |
39
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
40
+ |------------------------------|--------------------------|------------------------------------------------------------|------------------------|
41
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
42
+ | 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
43
+ | 8.3 (310P) | `2.2.rc1` | | |
44
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
45
+ | 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
46
+ | 8.2 (310P) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.0`, `0.9.2` | |
49
47
 
50
48
  ### Iluvatar CoreX
51
49
 
@@ -55,6 +53,13 @@ The following table lists the supported accelerated backends and their correspon
55
53
 
56
54
  ### NVIDIA CUDA
57
55
 
56
+ > [!CAUTION]
57
+ > Since v0.1.23:
58
+ > - Deprecated all services for CUDA 12.4.
59
+ > - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
60
+ > - Deprecated SGLang `0.5.5`.
61
+ > - Deprecated VoxBox `0.0.20`.
62
+
58
63
  > [!NOTE]
59
64
  > - CUDA 12.9 supports Compute Capabilities:
60
65
  `7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
@@ -63,16 +68,12 @@ The following table lists the supported accelerated backends and their correspon
63
68
  > - CUDA 12.6/12.4 supports Compute Capabilities:
64
69
  `7.5 8.0+PTX 8.9 9.0+PTX`.
65
70
 
66
- > [!IMPORTANT]
67
- > - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
68
- > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
69
-
70
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
71
- |------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
72
- | 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
73
- | 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
74
- | 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
75
- | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
71
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
72
+ |------------------------------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------------|
73
+ | 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
74
+ | 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
75
+ | 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2` | `0.0.21`, ~~`0.0.20`~~ |
76
+ | 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
76
77
 
77
78
  ### Hygon DTK
78
79
 
@@ -89,6 +90,11 @@ The following table lists the supported accelerated backends and their correspon
89
90
 
90
91
  ### AMD ROCm
91
92
 
93
+ > [!CAUTION]
94
+ > Since v0.1.23:
95
+ > Deprecated all services for ROCm 6.3.
96
+ > Deprecated vLLM `0.11.0`.
97
+
92
98
  > [!NOTE]
93
99
  > - ROCm 7.0 supports LLVM targets:
94
100
  `gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
@@ -98,18 +104,15 @@ The following table lists the supported accelerated backends and their correspon
98
104
  > [!WARNING]
99
105
  > - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
100
106
  7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2/0.11.0`, `gfx1150/gfx1151` are not supported yet.
101
- > - SGLang supports `gfx942` only.
102
107
  > - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
103
-
104
- > [!IMPORTANT]
105
- > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
106
- > - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
107
-
108
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
109
- |------------------------------|-------------------------------------------------|----------------------------------|
110
- | 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
111
- | 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
112
- | 6.3 | `0.10.1.1`, `0.10.0` | |
108
+ > - ROCm 6.4 SGLang supports `gfx942` only.
109
+ > - ROCm 7.0 SGLang supports `gfx950` only.
110
+
111
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
+ |------------------------------|-------------------------------------------------|------------------------------|
113
+ | 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
114
+ | 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
115
+ | 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
113
116
 
114
117
  ## Directory Structure
115
118
 
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.22.post5'
31
- __version_tuple__ = version_tuple = (0, 1, 22, 'post5')
30
+ __version__ = version = '0.1.23'
31
+ __version_tuple__ = version_tuple = (0, 1, 23)
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -0,0 +1 @@
1
+ git_commit = "60fcf6e"
@@ -46,6 +46,10 @@ _AVAILABLE_PLATFORMS = [
46
46
  ]
47
47
 
48
48
 
49
+ # Disable overriding default namespace at images operations.
50
+ os.environ["GPUSTACK_RUNNER_DEFAULT_NAMESPACE"] = "gpustack"
51
+
52
+
49
53
  class ListImagesSubCommand(SubCommand):
50
54
  """
51
55
  Command to list images.
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ from functools import lru_cache
4
+ from os import getenv as sys_getenv
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ from collections.abc import Callable
9
+
10
+ # Global
11
+
12
+ GPUSTACK_RUNNER_DEFAULT_NAMESPACE: str | None = None
13
+ """
14
+ Namespace for default runner images.
15
+ If not set, it should be "gpustack".
16
+ """
17
+
18
+ # --8<-- [start:env-vars-definition]
19
+
20
+ variables: dict[str, Callable[[], Any]] = {
21
+ # Global
22
+ "GPUSTACK_RUNNER_DEFAULT_NAMESPACE": lambda: trim_str(
23
+ getenvs(
24
+ keys=[
25
+ "GPUSTACK_RUNNER_DEFAULT_NAMESPACE",
26
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_NAMESPACE", ## Compatible with gpustack/gpustack_runtime.
27
+ ],
28
+ ),
29
+ ),
30
+ }
31
+
32
+
33
+ # --8<-- [end:env-vars-definition]
34
+
35
+
36
+ @lru_cache
37
+ def __getattr__(name: str):
38
+ # lazy evaluation of environment variables
39
+ if name in variables:
40
+ return variables[name]()
41
+ msg = f"module {__name__} has no attribute {name}"
42
+ raise AttributeError(msg)
43
+
44
+
45
+ def __dir__():
46
+ return list(variables.keys())
47
+
48
+
49
+ def trim_str(value: str | None) -> str | None:
50
+ """
51
+ Trim leading and trailing whitespace from a string.
52
+
53
+ Args:
54
+ value:
55
+ The string to trim.
56
+
57
+ Returns:
58
+ The trimmed string, or None if the input is None.
59
+
60
+ """
61
+ if value is not None:
62
+ return value.strip()
63
+ return None
64
+
65
+
66
+ _ENV_PREFIX = "GPUSTACK_RUNNER_"
67
+
68
+
69
+ def getenv(key: str, default=None) -> any | None:
70
+ """
71
+ Get the value of an environment variable.
72
+ Try headless module variable if the key starts with "GPUSTACK_RUNNER_".
73
+
74
+ Args:
75
+ key:
76
+ The environment variable key.
77
+ default:
78
+ The default value if the key is not found.
79
+
80
+ Returns:
81
+ The value of the environment variable if it exists, otherwise None.
82
+
83
+ """
84
+ value = sys_getenv(key)
85
+ if value is not None:
86
+ return value
87
+ if key.startswith(_ENV_PREFIX):
88
+ headless_key = key.removeprefix(_ENV_PREFIX)
89
+ return sys_getenv(headless_key, default)
90
+ return default
91
+
92
+
93
+ def getenvs(keys: list[str], default=None) -> any | None:
94
+ """
95
+ Get the value of an environment variable.
96
+ Return the first found value among the provided keys.
97
+
98
+ Args:
99
+ keys:
100
+ The environment variable key(s).
101
+ default:
102
+ The default value if none of the keys are found.
103
+
104
+ Returns:
105
+ The value of the environment variable if it exists, otherwise None.
106
+
107
+ """
108
+ for key in keys:
109
+ value = getenv(key)
110
+ if value is not None:
111
+ return value
112
+ return default
@@ -10,13 +10,15 @@ from typing import Any
10
10
 
11
11
  from dataclasses_json import dataclass_json
12
12
 
13
+ from . import envs
14
+
13
15
  _RE_DOCKER_IMAGE = re.compile(
14
- r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?gpustack/runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
16
+ r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
15
17
  )
16
18
  """
17
19
  Regex for Docker image parsing,
18
20
  which captures the following named groups:
19
- - `prefix`: The optional prefix before `gpustack/runner`, e.g. a registry URL or namespace.
21
+ - `prefix`: The optional prefix before `runner`, e.g. a registry URL or namespace.
20
22
  - `backend`: The backend name, e.g. "cann", "cuda", "rocm", etc.
21
23
  - `backend_version`: The backend version, ignored patch version, e.g. "8.2", "12.4", "6.3", etc.
22
24
  - `backend_variant`: The optional backend variant, e.g. "910b", etc.
@@ -33,7 +35,7 @@ def set_re_docker_image(pattern: str):
33
35
  Args:
34
36
  pattern:
35
37
  The regex pattern to set. It should capture the following named groups:
36
- - `prefix`: The optional prefix before `gpustack/runner`, e.g. a registry URL or namespace.
38
+ - `prefix`: The optional prefix before `runner`, e.g. a registry URL or namespace.
37
39
  - `backend`: The backend name, e.g. "cann", "cuda",
38
40
  - `backend_version`: The backend version, ignored patch version, e.g. "8.2", "12.4", "6.3", etc.
39
41
  - `backend_variant`: The optional backend variant, e.g. "910b", etc
@@ -82,7 +84,7 @@ class DockerImage:
82
84
  Parse the Docker image string into a DockerImage object.
83
85
 
84
86
  The given image string must follow the below regex format:
85
- `[prefix/]gpustack/runner:{backend}{backend_version}[-backend_variant]-{service}{service_version}[-suffix]`
87
+ `[prefix/]runner:{backend}{backend_version}[-backend_variant]-{service}{service_version}[-suffix]`
86
88
 
87
89
  Args:
88
90
  image:
@@ -100,7 +102,7 @@ class DockerImage:
100
102
  def __str__(self):
101
103
  parts = [
102
104
  "",
103
- "gpustack/runner:",
105
+ "runner:",
104
106
  self.backend,
105
107
  self.backend_version,
106
108
  ]
@@ -235,7 +237,13 @@ def list_runners(**kwargs) -> Runners | list[dict]:
235
237
  data_path = Path(_data_path) if isinstance(_data_path, str) else _data_path
236
238
  with data_path.open("r", encoding="utf-8") as f:
237
239
  json_list = json.load(f)
238
- runners = [Runner.from_dict(item) for item in json_list]
240
+ runners = []
241
+ for item in json_list:
242
+ if namespace := envs.GPUSTACK_RUNNER_DEFAULT_NAMESPACE:
243
+ docker_image = item["docker_image"]
244
+ docker_image = docker_image.replace("gpustack/", f"{namespace}/")
245
+ item["docker_image"] = docker_image
246
+ runners.append(Runner.from_dict(item))
239
247
 
240
248
  todict = kwargs.pop("todict", False)
241
249
  if not kwargs: