gpustack-runner 0.1.22.post3__tar.gz → 0.1.22.post5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/PKG-INFO +13 -12
  2. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/README.md +12 -11
  3. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/_version.py +2 -2
  4. gpustack_runner-0.1.22.post5/gpustack_runner/_version_appendix.py +1 -0
  5. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/runner.py.json +44 -0
  6. gpustack_runner-0.1.22.post5/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +25 -0
  7. gpustack_runner-0.1.22.post5/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +33 -0
  8. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/README.md +1 -0
  9. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/Dockerfile +3 -1
  10. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cuda/Dockerfile +1 -1
  11. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/matrix.yaml +0 -1
  12. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +44 -0
  13. gpustack_runner-0.1.22.post3/gpustack_runner/_version_appendix.py +0 -1
  14. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.codespelldict +0 -0
  15. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.codespellrc +0 -0
  16. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.gitattributes +0 -0
  17. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.gitignore +0 -0
  18. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.pre-commit-config.yaml +0 -0
  19. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/.python-version +0 -0
  20. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/LICENSE +0 -0
  21. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/Makefile +0 -0
  22. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/docs/index.md +0 -0
  23. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/docs/modules/gpustack_runner.md +0 -0
  24. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/__init__.py +0 -0
  25. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/__main__.py +0 -0
  26. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/_version.pyi +0 -0
  27. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/__init__.py +0 -0
  28. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/__types__.py +0 -0
  29. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/cmds/images.py +0 -0
  30. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/gpustack_runner/runner.py +0 -0
  31. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/hatch.toml +0 -0
  32. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/mkdocs.yml +0 -0
  33. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
  34. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
  35. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
  36. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
  37. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
  38. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
  39. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
  40. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
  41. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
  42. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
  43. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
  44. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
  45. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
  46. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  47. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
  48. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
  49. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
  50. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
  51. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
  52. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
  53. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
  54. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
  55. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
  56. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  57. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
  58. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
  59. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
  60. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
  61. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
  62. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
  63. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
  64. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
  65. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
  66. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
  67. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
  68. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
  69. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
  70. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
  71. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
  72. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
  73. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
  74. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
  75. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
  76. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
  77. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
  78. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
  79. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
  80. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
  81. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
  82. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
  83. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
  84. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
  85. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
  86. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  87. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  88. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/cann/patches/mindie.zip +0 -0
  89. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/corex/Dockerfile +0 -0
  90. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/discard_runner.sh +0 -0
  91. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/dtk/Dockerfile +0 -0
  92. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/expand_matrix.sh +0 -0
  93. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/maca/Dockerfile +0 -0
  94. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/merge_runner.sh +0 -0
  95. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/prune_runner.sh +0 -0
  96. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/rocm/Dockerfile +0 -0
  97. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
  98. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pyproject.toml +0 -0
  99. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/pytest.ini +0 -0
  100. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/ruff.toml +0 -0
  101. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/__init__.py +0 -0
  102. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
  103. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
  104. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
  105. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
  106. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tests/gpustack_runner/test_runner.py +0 -0
  107. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/activate +0 -0
  108. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat.sh +0 -0
  109. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_current_date_time.sh +0 -0
  110. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_get_temperature.sh +0 -0
  111. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_get_weather.sh +0 -0
  112. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_square_of_number.sh +0 -0
  113. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_square_root_of_number.sh +0 -0
  114. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/chat_tool_where_am_i.sh +0 -0
  115. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/run_runner.sh +0 -0
  116. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/tools/run_runner_cluster.sh +0 -0
  117. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/uv.lock +0 -0
  118. {gpustack_runner-0.1.22.post3 → gpustack_runner-0.1.22.post5}/uv.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.22.post3
3
+ Version: 0.1.22.post5
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -54,6 +54,7 @@ The following table lists the supported accelerated backends and their correspon
54
54
 
55
55
  > [!IMPORTANT]
56
56
  > - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
57
+ > - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
57
58
  > - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
58
59
  > - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
59
60
 
@@ -86,12 +87,12 @@ The following table lists the supported accelerated backends and their correspon
86
87
  > - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
87
88
  > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
88
89
 
89
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
90
- |------------------------------|--------------------------------------------------------------------------------------|-----------------------------------------------------------|----------|
91
- | 12.9 | `0.13.0`, `0.12.0`, **`0.11.2`** | `0.5.6.post2` | |
92
- | 12.8 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/>`0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.20` |
93
- | 12.6 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/>`0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | `0.5.6.post2` | `0.0.20` |
94
- | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
90
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
91
+ |------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
92
+ | 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
93
+ | 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
94
+ | 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
95
+ | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
95
96
 
96
97
  ### Hygon DTK
97
98
 
@@ -124,11 +125,11 @@ The following table lists the supported accelerated backends and their correspon
124
125
  > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
125
126
  > - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
126
127
 
127
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
128
- |------------------------------|--------------------------------------------------|----------------------------------|
129
- | 7.0 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/> `0.11.0` | `0.5.6.post2` |
130
- | 6.4 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/> `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
131
- | 6.3 | `0.10.1.1`, `0.10.0` | |
128
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
129
+ |------------------------------|-------------------------------------------------|----------------------------------|
130
+ | 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
131
+ | 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
132
+ | 6.3 | `0.10.1.1`, `0.10.0` | |
132
133
 
133
134
  ## Directory Structure
134
135
 
@@ -34,6 +34,7 @@ The following table lists the supported accelerated backends and their correspon
34
34
 
35
35
  > [!IMPORTANT]
36
36
  > - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
37
+ > - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
37
38
  > - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
38
39
  > - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
39
40
 
@@ -66,12 +67,12 @@ The following table lists the supported accelerated backends and their correspon
66
67
  > - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
67
68
  > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
68
69
 
69
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
70
- |------------------------------|--------------------------------------------------------------------------------------|-----------------------------------------------------------|----------|
71
- | 12.9 | `0.13.0`, `0.12.0`, **`0.11.2`** | `0.5.6.post2` | |
72
- | 12.8 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/>`0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.20` |
73
- | 12.6 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/>`0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | `0.5.6.post2` | `0.0.20` |
74
- | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
70
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
71
+ |------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
72
+ | 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
73
+ | 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
74
+ | 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
75
+ | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
75
76
 
76
77
  ### Hygon DTK
77
78
 
@@ -104,11 +105,11 @@ The following table lists the supported accelerated backends and their correspon
104
105
  > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
105
106
  > - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
106
107
 
107
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
108
- |------------------------------|--------------------------------------------------|----------------------------------|
109
- | 7.0 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/> `0.11.0` | `0.5.6.post2` |
110
- | 6.4 | `0.13.0`, `0.12.0`, **`0.11.2`**, <br/> `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
111
- | 6.3 | `0.10.1.1`, `0.10.0` | |
108
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
109
+ |------------------------------|-------------------------------------------------|----------------------------------|
110
+ | 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
111
+ | 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
112
+ | 6.3 | `0.10.1.1`, `0.10.0` | |
112
113
 
113
114
  ## Directory Structure
114
115
 
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.22.post3'
31
- __version_tuple__ = version_tuple = (0, 1, 22, 'post3')
30
+ __version__ = version = '0.1.22.post5'
31
+ __version_tuple__ = version_tuple = (0, 1, 22, 'post5')
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -0,0 +1 @@
1
+ git_commit = "18bd835"
@@ -901,6 +901,28 @@
901
901
  "docker_image": "gpustack/runner:cuda12.8-vllm0.10.0",
902
902
  "deprecated": false
903
903
  },
904
+ {
905
+ "backend": "cuda",
906
+ "backend_version": "12.8",
907
+ "original_backend_version": "12.8.1",
908
+ "backend_variant": "",
909
+ "service": "voxbox",
910
+ "service_version": "0.0.21",
911
+ "platform": "linux/amd64",
912
+ "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
913
+ "deprecated": false
914
+ },
915
+ {
916
+ "backend": "cuda",
917
+ "backend_version": "12.8",
918
+ "original_backend_version": "12.8.1",
919
+ "backend_variant": "",
920
+ "service": "voxbox",
921
+ "service_version": "0.0.21",
922
+ "platform": "linux/arm64",
923
+ "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
924
+ "deprecated": false
925
+ },
904
926
  {
905
927
  "backend": "cuda",
906
928
  "backend_version": "12.8",
@@ -1077,6 +1099,28 @@
1077
1099
  "docker_image": "gpustack/runner:cuda12.6-vllm0.10.0",
1078
1100
  "deprecated": false
1079
1101
  },
1102
+ {
1103
+ "backend": "cuda",
1104
+ "backend_version": "12.6",
1105
+ "original_backend_version": "12.6.3",
1106
+ "backend_variant": "",
1107
+ "service": "voxbox",
1108
+ "service_version": "0.0.21",
1109
+ "platform": "linux/amd64",
1110
+ "docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
1111
+ "deprecated": false
1112
+ },
1113
+ {
1114
+ "backend": "cuda",
1115
+ "backend_version": "12.6",
1116
+ "original_backend_version": "12.6.3",
1117
+ "backend_variant": "",
1118
+ "service": "voxbox",
1119
+ "service_version": "0.0.21",
1120
+ "platform": "linux/arm64",
1121
+ "docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
1122
+ "deprecated": false
1123
+ },
1080
1124
  {
1081
1125
  "backend": "cuda",
1082
1126
  "backend_version": "12.6",
@@ -0,0 +1,25 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CANN_VERSION=8.3
3
+ ARG CANN_ARCHS=910b
4
+ ARG MINDIE_VERSION=2.2.rc1
5
+
6
+ FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-mindie${MINDIE_VERSION} AS mindie
7
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
8
+
9
+ ARG TARGETPLATFORM
10
+ ARG TARGETOS
11
+ ARG TARGETARCH
12
+
13
+ ## Patch ATB Config
14
+
15
+ RUN <<EOF
16
+ # Patch ATB Config
17
+
18
+ sed -i "s/\"ep_level\": 2/\"ep_level\": 1/g" ${CANN_HOME}/atb-models/atb_llm/conf/config.json
19
+
20
+ EOF
21
+
22
+ ## Entrypoint
23
+
24
+ WORKDIR /
25
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,33 @@
1
+ rules:
2
+
3
+ #
4
+ # Ascend CANN
5
+ #
6
+
7
+ ## Packed Ascend CANN 8.3, using CANN Kernel for A3.
8
+ ##
9
+ - backend: "cann"
10
+ services:
11
+ - "mindie"
12
+ args:
13
+ - "CANN_VERSION=8.3"
14
+ - "CANN_ARCHS=a3"
15
+ - "MINDIE_VERSION=2.2.rc1"
16
+ ## Packed Ascend CANN 8.3, using CANN Kernel for 910B.
17
+ ##
18
+ - backend: "cann"
19
+ services:
20
+ - "mindie"
21
+ args:
22
+ - "CANN_VERSION=8.3"
23
+ - "CANN_ARCHS=910b"
24
+ - "MINDIE_VERSION=2.2.rc1"
25
+ ## Packed Ascend CANN 8.3, using CANN Kernel for 310P.
26
+ ##
27
+ - backend: "cann"
28
+ services:
29
+ - "mindie"
30
+ args:
31
+ - "CANN_VERSION=8.3"
32
+ - "CANN_ARCHS=310p"
33
+ - "MINDIE_VERSION=2.2.rc1"
@@ -31,3 +31,4 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
31
31
  - [x] 2025-12-16: Uninstall `runai-model-streamer` packages from SGLang 0.5.6.post2 for CUDA released images.
32
32
  - [x] 2025-12-19: Install `vLLM[audio]` packages for vLLM 0.12.0/0.11.2 of CUDA/ROCm released images.
33
33
  - [x] 2025-12-19: Install `petit-kernel` package for vLLM 0.12.0/0.11.2 and SGLang 0.5.6.post2/0.5.5.post3 of ROcm released images.
34
+ - [x] 2025-12-24: Apply ATB config patches to MindIE 2.2.rc1 for CANN released images.
@@ -583,8 +583,10 @@ RUN --mount=type=bind,target=/workspace,rw <<EOF
583
583
  # Patch
584
584
 
585
585
  unzip /workspace/patches/mindie.zip -d /workspace/patches
586
- pushd /usr/local/Ascend/atb-models \
586
+ pushd ${CANN_HOME}/atb-models \
587
587
  && patch -p1 < /workspace/patches/mindie/*.patch
588
+
589
+ sed -i "s/\"ep_level\": 2/\"ep_level\": 1/g" ${CANN_HOME}/atb-models/atb_llm/conf/config.json
588
590
  EOF
589
591
 
590
592
  ## Postprocess
@@ -95,7 +95,7 @@ ARG CMAKE_MAX_JOBS
95
95
  ARG CUDA_VERSION=12.9.1
96
96
  ARG CUDA_ARCHS
97
97
  ARG VOXBOX_BASE_IMAGE=gpustack/runner:cuda${CUDA_VERSION}-python${PYTHON_VERSION}
98
- ARG VOXBOX_VERSION=0.0.20
98
+ ARG VOXBOX_VERSION=0.0.21
99
99
  ARG VOXBOX_TORCH_VERSION=2.7.1
100
100
  ARG VOXBOX_TORCH_CUDA_VERSION=${CUDA_VERSION}
101
101
  ARG VLLM_BASE_IMAGE=gpustack/runner:cuda${CUDA_VERSION}-python${PYTHON_VERSION}
@@ -102,7 +102,6 @@ rules:
102
102
  ##
103
103
  - backend: "cuda"
104
104
  services:
105
- - "voxbox"
106
105
  - "vllm"
107
106
  - "sglang"
108
107
  args:
@@ -923,6 +923,28 @@
923
923
  "docker_image": "gpustack/runner:cuda12.8-vllm0.10.0",
924
924
  "deprecated": false
925
925
  },
926
+ {
927
+ "backend": "cuda",
928
+ "backend_version": "12.8",
929
+ "original_backend_version": "12.8.1",
930
+ "backend_variant": "",
931
+ "service": "voxbox",
932
+ "service_version": "0.0.21",
933
+ "platform": "linux/amd64",
934
+ "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
935
+ "deprecated": false
936
+ },
937
+ {
938
+ "backend": "cuda",
939
+ "backend_version": "12.8",
940
+ "original_backend_version": "12.8.1",
941
+ "backend_variant": "",
942
+ "service": "voxbox",
943
+ "service_version": "0.0.21",
944
+ "platform": "linux/arm64",
945
+ "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.21",
946
+ "deprecated": false
947
+ },
926
948
  {
927
949
  "backend": "cuda",
928
950
  "backend_version": "12.8",
@@ -1099,6 +1121,28 @@
1099
1121
  "docker_image": "gpustack/runner:cuda12.6-vllm0.10.0",
1100
1122
  "deprecated": false
1101
1123
  },
1124
+ {
1125
+ "backend": "cuda",
1126
+ "backend_version": "12.6",
1127
+ "original_backend_version": "12.6.3",
1128
+ "backend_variant": "",
1129
+ "service": "voxbox",
1130
+ "service_version": "0.0.21",
1131
+ "platform": "linux/amd64",
1132
+ "docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
1133
+ "deprecated": false
1134
+ },
1135
+ {
1136
+ "backend": "cuda",
1137
+ "backend_version": "12.6",
1138
+ "original_backend_version": "12.6.3",
1139
+ "backend_variant": "",
1140
+ "service": "voxbox",
1141
+ "service_version": "0.0.21",
1142
+ "platform": "linux/arm64",
1143
+ "docker_image": "gpustack/runner:cuda12.6-voxbox0.0.21",
1144
+ "deprecated": false
1145
+ },
1102
1146
  {
1103
1147
  "backend": "cuda",
1104
1148
  "backend_version": "12.6",
@@ -1 +0,0 @@
1
- git_commit = "c9f91c3"