gpustack-runner 0.1.24.post2__tar.gz → 0.1.24.post3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/PKG-INFO +21 -21
  2. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/README.md +20 -20
  3. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/_version.py +2 -2
  4. gpustack_runner-0.1.24.post3/gpustack_runner/_version_appendix.py +1 -0
  5. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/runner.py.json +117 -7
  6. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile +74 -0
  7. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml +28 -0
  8. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/cuda/Dockerfile +25 -0
  9. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/cuda/patches/vllm_001_wrong_dp_ray.patch +41 -0
  10. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/matrix.yaml +55 -0
  11. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/rocm/Dockerfile +25 -0
  12. gpustack_runner-0.1.24.post3/pack/.post_operation/20260129_vllm_patch_dp/rocm/patches/vllm_001_wrong_dp_ray.patch +41 -0
  13. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/README.md +2 -0
  14. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/Dockerfile +18 -3
  15. gpustack_runner-0.1.24.post3/pack/cann/patches/vllm_001_wrong_dp_ray.patch +41 -0
  16. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cuda/Dockerfile +23 -10
  17. gpustack_runner-0.1.24.post3/pack/cuda/patches/vllm_001_wrong_dp_ray.patch +41 -0
  18. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/matrix.yaml +17 -17
  19. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/rocm/Dockerfile +22 -14
  20. gpustack_runner-0.1.24.post3/pack/rocm/patches/vllm_001_wrong_dp_ray.patch +41 -0
  21. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +117 -7
  22. gpustack_runner-0.1.24.post2/gpustack_runner/_version_appendix.py +0 -1
  23. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.codespelldict +0 -0
  24. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.codespellrc +0 -0
  25. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.gitattributes +0 -0
  26. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.gitignore +0 -0
  27. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.pre-commit-config.yaml +0 -0
  28. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/.python-version +0 -0
  29. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/LICENSE +0 -0
  30. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/Makefile +0 -0
  31. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/docs/index.md +0 -0
  32. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/docs/modules/gpustack_runner.md +0 -0
  33. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/__init__.py +0 -0
  34. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/__main__.py +0 -0
  35. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/__utils__.py +0 -0
  36. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/_version.pyi +0 -0
  37. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/cmds/__init__.py +0 -0
  38. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/cmds/__types__.py +0 -0
  39. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/cmds/images.py +0 -0
  40. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/envs.py +0 -0
  41. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/gpustack_runner/runner.py +0 -0
  42. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/hatch.toml +0 -0
  43. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/mkdocs.yml +0 -0
  44. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
  45. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
  46. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
  47. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
  48. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
  49. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
  50. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
  51. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
  52. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
  53. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
  54. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
  55. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
  56. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
  57. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  58. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
  59. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
  60. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
  61. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
  62. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
  63. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
  64. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
  65. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
  66. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
  67. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  68. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
  69. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
  70. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
  71. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
  72. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
  73. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
  74. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
  75. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
  76. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
  77. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
  78. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
  79. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
  80. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
  81. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
  82. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
  83. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
  84. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
  85. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
  86. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
  87. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
  88. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
  89. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
  90. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
  91. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
  92. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
  93. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
  94. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
  95. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
  96. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
  97. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
  98. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
  99. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
  100. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
  101. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
  102. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
  103. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/mindie-atb-models_2.3.0_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  104. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/mindie-atb-models_2.3.0_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  105. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/cann/patches/mindie.zip +0 -0
  106. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/corex/Dockerfile +0 -0
  107. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/discard_runner.sh +0 -0
  108. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/dtk/Dockerfile +0 -0
  109. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/expand_matrix.sh +0 -0
  110. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/hggc/Dockerfile +0 -0
  111. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/maca/Dockerfile +0 -0
  112. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/merge_runner.sh +0 -0
  113. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/musa/Dockerfile +0 -0
  114. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/prune_runner.sh +0 -0
  115. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
  116. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/squash_expand_matrix.sh +0 -0
  117. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pack/squash_image.sh +0 -0
  118. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pyproject.toml +0 -0
  119. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/pytest.ini +0 -0
  120. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/ruff.toml +0 -0
  121. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/__init__.py +0 -0
  122. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
  123. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
  124. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
  125. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
  126. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
  127. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
  128. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
  129. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/test_runner.py +0 -0
  130. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tests/gpustack_runner/test_utils.py +0 -0
  131. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/activate +0 -0
  132. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat.sh +0 -0
  133. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_current_date_time.sh +0 -0
  134. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_get_temperature.sh +0 -0
  135. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_get_weather.sh +0 -0
  136. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_square_of_number.sh +0 -0
  137. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_square_root_of_number.sh +0 -0
  138. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/chat_tool_where_am_i.sh +0 -0
  139. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/run_runner.sh +0 -0
  140. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/tools/run_runner_cluster.sh +0 -0
  141. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/uv.lock +0 -0
  142. {gpustack_runner-0.1.24.post2 → gpustack_runner-0.1.24.post3}/uv.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.24.post2
3
+ Version: 0.1.24.post3
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -52,17 +52,17 @@ The following table lists the supported accelerated backends and their correspon
52
52
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
53
53
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
54
54
 
55
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
56
- |------------------------------|-----------|------------------------------------------------------------|------------------------|
57
- | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
58
- | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
59
- | 8.5 (310P) | `2.3.0` | `0.14.1` | |
60
- | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
61
- | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
62
- | 8.3 (310P) | `2.2.rc1` | | |
63
- | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
64
- | 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
65
- | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
55
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
56
+ |------------------------------|-----------|--------------------------------------------------------------------|------------------------|
57
+ | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
58
+ | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
59
+ | 8.5 (310P) | `2.3.0` | `0.14.1` | |
60
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
61
+ | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
62
+ | 8.3 (310P) | `2.2.rc1` | | |
63
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
64
+ | 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
65
+ | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
66
66
 
67
67
  ### Iluvatar CoreX
68
68
 
@@ -80,11 +80,11 @@ The following table lists the supported accelerated backends and their correspon
80
80
  > - CUDA 12.6/12.4 supports Compute Capabilities:
81
81
  `7.5 8.0+PTX 8.9 9.0+PTX`.
82
82
 
83
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
84
- |------------------------------|---------------------------------------------|-----------------------------------------------------------|----------|
85
- | 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` | |
86
- | 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3`, `0.5.4.post3` | `0.0.21` |
87
- | 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`,`0.10.2` | | `0.0.21` |
83
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
84
+ |------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
85
+ | 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
86
+ | 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
87
+ | 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
88
88
 
89
89
  ### Hygon DTK
90
90
 
@@ -128,10 +128,10 @@ The following table lists the supported accelerated backends and their correspon
128
128
  > - ROCm 6.4 SGLang supports `gfx942` only.
129
129
  > - ROCm 7.0 SGLang supports `gfx950` only.
130
130
 
131
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
- |------------------------------|---------------------------------------------|--------------------------------------------|
133
- | 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
134
- | 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
131
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
+ |------------------------------|-------------------------------------------------|--------------------------------------------|
133
+ | 7.0 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
134
+ | 6.4 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
135
135
 
136
136
  ## Directory Structure
137
137
 
@@ -32,17 +32,17 @@ The following table lists the supported accelerated backends and their correspon
32
32
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
33
33
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
34
34
 
35
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
36
- |------------------------------|-----------|------------------------------------------------------------|------------------------|
37
- | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
38
- | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
39
- | 8.5 (310P) | `2.3.0` | `0.14.1` | |
40
- | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
41
- | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
42
- | 8.3 (310P) | `2.2.rc1` | | |
43
- | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
44
- | 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
45
- | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
35
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
36
+ |------------------------------|-----------|--------------------------------------------------------------------|------------------------|
37
+ | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
38
+ | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
39
+ | 8.5 (310P) | `2.3.0` | `0.14.1` | |
40
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
41
+ | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
42
+ | 8.3 (310P) | `2.2.rc1` | | |
43
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
44
+ | 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
45
+ | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
46
46
 
47
47
  ### Iluvatar CoreX
48
48
 
@@ -60,11 +60,11 @@ The following table lists the supported accelerated backends and their correspon
60
60
  > - CUDA 12.6/12.4 supports Compute Capabilities:
61
61
  `7.5 8.0+PTX 8.9 9.0+PTX`.
62
62
 
63
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
64
- |------------------------------|---------------------------------------------|-----------------------------------------------------------|----------|
65
- | 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` | |
66
- | 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3`, `0.5.4.post3` | `0.0.21` |
67
- | 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`,`0.10.2` | | `0.0.21` |
63
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
64
+ |------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
65
+ | 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
66
+ | 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
67
+ | 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
68
68
 
69
69
  ### Hygon DTK
70
70
 
@@ -108,10 +108,10 @@ The following table lists the supported accelerated backends and their correspon
108
108
  > - ROCm 6.4 SGLang supports `gfx942` only.
109
109
  > - ROCm 7.0 SGLang supports `gfx950` only.
110
110
 
111
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
- |------------------------------|---------------------------------------------|--------------------------------------------|
113
- | 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
114
- | 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
111
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
+ |------------------------------|-------------------------------------------------|--------------------------------------------|
113
+ | 7.0 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
114
+ | 6.4 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
115
115
 
116
116
  ## Directory Structure
117
117
 
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.24.post2'
31
- __version_tuple__ = version_tuple = (0, 1, 24, 'post2')
30
+ __version__ = version = '0.1.24.post3'
31
+ __version_tuple__ = version_tuple = (0, 1, 24, 'post3')
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -0,0 +1 @@
1
+ git_commit = "dc41ed2"
@@ -261,7 +261,7 @@
261
261
  "service_version": "0.10.1.1",
262
262
  "platform": "linux/amd64",
263
263
  "docker_image": "gpustack/runner:cann8.2-a3-vllm0.10.1.1",
264
- "deprecated": false
264
+ "deprecated": true
265
265
  },
266
266
  {
267
267
  "backend": "cann",
@@ -272,7 +272,7 @@
272
272
  "service_version": "0.10.1.1",
273
273
  "platform": "linux/arm64",
274
274
  "docker_image": "gpustack/runner:cann8.2-a3-vllm0.10.1.1",
275
- "deprecated": false
275
+ "deprecated": true
276
276
  },
277
277
  {
278
278
  "backend": "cann",
@@ -558,7 +558,7 @@
558
558
  "service_version": "0.10.1.1",
559
559
  "platform": "linux/amd64",
560
560
  "docker_image": "gpustack/runner:cann8.2-910b-vllm0.10.1.1",
561
- "deprecated": false
561
+ "deprecated": true
562
562
  },
563
563
  {
564
564
  "backend": "cann",
@@ -569,7 +569,7 @@
569
569
  "service_version": "0.10.1.1",
570
570
  "platform": "linux/arm64",
571
571
  "docker_image": "gpustack/runner:cann8.2-910b-vllm0.10.1.1",
572
- "deprecated": false
572
+ "deprecated": true
573
573
  },
574
574
  {
575
575
  "backend": "cann",
@@ -624,7 +624,7 @@
624
624
  "service_version": "0.9.1",
625
625
  "platform": "linux/amd64",
626
626
  "docker_image": "gpustack/runner:cann8.2-910b-vllm0.9.1",
627
- "deprecated": false
627
+ "deprecated": true
628
628
  },
629
629
  {
630
630
  "backend": "cann",
@@ -635,7 +635,7 @@
635
635
  "service_version": "0.9.1",
636
636
  "platform": "linux/arm64",
637
637
  "docker_image": "gpustack/runner:cann8.2-910b-vllm0.9.1",
638
- "deprecated": false
638
+ "deprecated": true
639
639
  },
640
640
  {
641
641
  "backend": "cann",
@@ -802,6 +802,28 @@
802
802
  "docker_image": "gpustack/runner:corex4.2-vllm0.8.3",
803
803
  "deprecated": false
804
804
  },
805
+ {
806
+ "backend": "cuda",
807
+ "backend_version": "12.9",
808
+ "original_backend_version": "12.9.1",
809
+ "backend_variant": "",
810
+ "service": "sglang",
811
+ "service_version": "0.5.8",
812
+ "platform": "linux/amd64",
813
+ "docker_image": "gpustack/runner:cuda12.9-sglang0.5.8",
814
+ "deprecated": false
815
+ },
816
+ {
817
+ "backend": "cuda",
818
+ "backend_version": "12.9",
819
+ "original_backend_version": "12.9.1",
820
+ "backend_variant": "",
821
+ "service": "sglang",
822
+ "service_version": "0.5.8",
823
+ "platform": "linux/arm64",
824
+ "docker_image": "gpustack/runner:cuda12.9-sglang0.5.8",
825
+ "deprecated": false
826
+ },
805
827
  {
806
828
  "backend": "cuda",
807
829
  "backend_version": "12.9",
@@ -846,6 +868,28 @@
846
868
  "docker_image": "gpustack/runner:cuda12.9-sglang0.5.6.post2",
847
869
  "deprecated": false
848
870
  },
871
+ {
872
+ "backend": "cuda",
873
+ "backend_version": "12.9",
874
+ "original_backend_version": "12.9.1",
875
+ "backend_variant": "",
876
+ "service": "vllm",
877
+ "service_version": "0.14.1",
878
+ "platform": "linux/amd64",
879
+ "docker_image": "gpustack/runner:cuda12.9-vllm0.14.1",
880
+ "deprecated": false
881
+ },
882
+ {
883
+ "backend": "cuda",
884
+ "backend_version": "12.9",
885
+ "original_backend_version": "12.9.1",
886
+ "backend_variant": "",
887
+ "service": "vllm",
888
+ "service_version": "0.14.1",
889
+ "platform": "linux/arm64",
890
+ "docker_image": "gpustack/runner:cuda12.9-vllm0.14.1",
891
+ "deprecated": false
892
+ },
849
893
  {
850
894
  "backend": "cuda",
851
895
  "backend_version": "12.9",
@@ -912,6 +956,28 @@
912
956
  "docker_image": "gpustack/runner:cuda12.9-vllm0.11.2",
913
957
  "deprecated": false
914
958
  },
959
+ {
960
+ "backend": "cuda",
961
+ "backend_version": "12.8",
962
+ "original_backend_version": "12.8.1",
963
+ "backend_variant": "",
964
+ "service": "sglang",
965
+ "service_version": "0.5.8",
966
+ "platform": "linux/amd64",
967
+ "docker_image": "gpustack/runner:cuda12.8-sglang0.5.8",
968
+ "deprecated": false
969
+ },
970
+ {
971
+ "backend": "cuda",
972
+ "backend_version": "12.8",
973
+ "original_backend_version": "12.8.1",
974
+ "backend_variant": "",
975
+ "service": "sglang",
976
+ "service_version": "0.5.8",
977
+ "platform": "linux/arm64",
978
+ "docker_image": "gpustack/runner:cuda12.8-sglang0.5.8",
979
+ "deprecated": false
980
+ },
915
981
  {
916
982
  "backend": "cuda",
917
983
  "backend_version": "12.8",
@@ -998,7 +1064,7 @@
998
1064
  "service_version": "0.5.4.post3",
999
1065
  "platform": "linux/amd64",
1000
1066
  "docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
1001
- "deprecated": false
1067
+ "deprecated": true
1002
1068
  },
1003
1069
  {
1004
1070
  "backend": "cuda",
@@ -1009,6 +1075,28 @@
1009
1075
  "service_version": "0.5.4.post3",
1010
1076
  "platform": "linux/arm64",
1011
1077
  "docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
1078
+ "deprecated": true
1079
+ },
1080
+ {
1081
+ "backend": "cuda",
1082
+ "backend_version": "12.8",
1083
+ "original_backend_version": "12.8.1",
1084
+ "backend_variant": "",
1085
+ "service": "vllm",
1086
+ "service_version": "0.14.1",
1087
+ "platform": "linux/amd64",
1088
+ "docker_image": "gpustack/runner:cuda12.8-vllm0.14.1",
1089
+ "deprecated": false
1090
+ },
1091
+ {
1092
+ "backend": "cuda",
1093
+ "backend_version": "12.8",
1094
+ "original_backend_version": "12.8.1",
1095
+ "backend_variant": "",
1096
+ "service": "vllm",
1097
+ "service_version": "0.14.1",
1098
+ "platform": "linux/arm64",
1099
+ "docker_image": "gpustack/runner:cuda12.8-vllm0.14.1",
1012
1100
  "deprecated": false
1013
1101
  },
1014
1102
  {
@@ -1209,6 +1297,28 @@
1209
1297
  "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
1210
1298
  "deprecated": true
1211
1299
  },
1300
+ {
1301
+ "backend": "cuda",
1302
+ "backend_version": "12.6",
1303
+ "original_backend_version": "12.6.3",
1304
+ "backend_variant": "",
1305
+ "service": "vllm",
1306
+ "service_version": "0.14.1",
1307
+ "platform": "linux/amd64",
1308
+ "docker_image": "gpustack/runner:cuda12.6-vllm0.14.1",
1309
+ "deprecated": false
1310
+ },
1311
+ {
1312
+ "backend": "cuda",
1313
+ "backend_version": "12.6",
1314
+ "original_backend_version": "12.6.3",
1315
+ "backend_variant": "",
1316
+ "service": "vllm",
1317
+ "service_version": "0.14.1",
1318
+ "platform": "linux/arm64",
1319
+ "docker_image": "gpustack/runner:cuda12.6-vllm0.14.1",
1320
+ "deprecated": false
1321
+ },
1212
1322
  {
1213
1323
  "backend": "cuda",
1214
1324
  "backend_version": "12.6",
@@ -0,0 +1,74 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CANN_VERSION=8.3
3
+ ARG CANN_ARCHS=910b
4
+ ARG SGLANG_VERSION=0.12.0
5
+ ARG SGLANG_KERNEL_VERSION=20251206
6
+
7
+ FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-sglang${SGLANG_VERSION} AS sglang
8
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
9
+
10
+ ARG TARGETPLATFORM
11
+ ARG TARGETOS
12
+ ARG TARGETARCH
13
+
14
+ ## Reinstall SGLang Kernel
15
+
16
+ ARG CMAKE_MAX_JOBS
17
+ ARG SGLANG_VERSION
18
+ ARG SGLANG_KERNEL_VERSION
19
+
20
+ ENV SGLANG_VERSION=${SGLANG_VERSION} \
21
+ SGLANG_KERNEL_VERSION=${SGLANG_KERNEL_VERSION}
22
+
23
+ RUN <<EOF
24
+ # SGLang
25
+
26
+ CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
27
+ if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
28
+ CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
29
+ fi
30
+ if (( $(echo "${CMAKE_MAX_JOBS} > 8" | bc -l) )); then
31
+ CMAKE_MAX_JOBS="8"
32
+ fi
33
+ export MAX_JOBS="${CMAKE_MAX_JOBS}"
34
+ export COMPILE_CUSTOM_KERNELS=1
35
+ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CANN_HOME}/ascend-toolkit/latest/$(uname -i)-linux/devlib"
36
+ export LD_LIBRARY_PATH="${CANN_HOME}/ascend-toolkit/latest/runtime/lib64/stub:${LD_LIBRARY_PATH}"
37
+ source ${CANN_HOME}/ascend-toolkit/set_env.sh
38
+ echo "Building SGLang with the following environment variables:"
39
+ env
40
+
41
+ # Install Dependencies
42
+ cat <<EOT >/tmp/requirements.txt
43
+ attrs==25.4.0
44
+ decorator==5.2.1
45
+ psutil==7.1.3
46
+ pyyaml==6.0.3
47
+ triton-ascend==3.2.0
48
+ EOT
49
+ uv pip install \
50
+ -r /tmp/requirements.txt
51
+
52
+ # Build and Install SGLang Kernel
53
+ git -C /tmp clone --recursive --shallow-submodules \
54
+ --depth 1 --branch ${SGLANG_KERNEL_VERSION} --single-branch \
55
+ https://github.com/sgl-project/sgl-kernel-npu.git sgl-kernel-npu
56
+ unset ASCEND_HOME_PATH
57
+ pushd /tmp/sgl-kernel-npu \
58
+ && ./build.sh \
59
+ && tree -hs /tmp/sgl-kernel-npu/output \
60
+ && uv pip install /tmp/sgl-kernel-npu/output/deep_ep*.whl /tmp/sgl-kernel-npu/output/sgl_kernel_npu*.whl
61
+
62
+ # Postprocess SGLang Kernel (DeepEP)
63
+ cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -sf deep_ep/deep_ep_cpp*.so
64
+
65
+ # Cleanup
66
+ rm -rf /var/tmp/* \
67
+ && rm -rf /tmp/* \
68
+ && ccache --clear --clean
69
+ EOF
70
+
71
+ ## Entrypoint
72
+
73
+ WORKDIR /
74
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,28 @@
1
+ rules:
2
+
3
+ #
4
+ # Ascend CANN
5
+ #
6
+
7
+ ## Packed Ascend CANN 8.3, using CANN Kernel for A3.
8
+ ##
9
+ - backend: "cann"
10
+ services:
11
+ - "sglang"
12
+ platforms:
13
+ - "linux/arm64"
14
+ args:
15
+ - "CANN_VERSION=8.3"
16
+ - "CANN_ARCHS=a3"
17
+ - "SGLANG_VERSION=0.5.7"
18
+ ## Packed Ascend CANN 8.3, using CANN Kernel for 910B.
19
+ ##
20
+ - backend: "cann"
21
+ services:
22
+ - "sglang"
23
+ platforms:
24
+ - "linux/arm64"
25
+ args:
26
+ - "CANN_VERSION=8.3"
27
+ - "CANN_ARCHS=910b"
28
+ - "SGLANG_VERSION=0.5.7"
@@ -0,0 +1,25 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CUDA_VERSION=12.8
3
+ ARG VLLM_VERSION=0.13.0
4
+
5
+ FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
6
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
7
+
8
+ ARG TARGETPLATFORM
9
+ ARG TARGETOS
10
+ ARG TARGETARCH
11
+
12
+ ## Patch
13
+
14
+ RUN --mount=type=bind,target=/workspace,rw <<EOF
15
+ # Patch
16
+
17
+ tree -hs /workspace/patches
18
+ pushd $(pip show vllm | grep Location: | cut -d" " -f 2) \
19
+ && patch -p1 < /workspace/patches/vllm_*.patch
20
+ EOF
21
+
22
+ ## Entrypoint
23
+
24
+ WORKDIR /
25
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,41 @@
1
+ diff --git a/vllm/utils/network_utils.py b/vllm/utils/network_utils.py
2
+ index 7d01533cb..311ed44df 100644
3
+ --- a/vllm/utils/network_utils.py
4
+ +++ b/vllm/utils/network_utils.py
5
+ @@ -147,6 +147,9 @@ def get_open_zmq_inproc_path() -> str:
6
+ return f"inproc://{uuid4()}"
7
+
8
+
9
+ +_next_port: int | None = None
10
+ +
11
+ +
12
+ def get_open_port() -> int:
13
+ """
14
+ Get an open port for the vLLM process to listen on.
15
+ @@ -163,7 +166,7 @@ def get_open_port() -> int:
16
+ candidate_port = _get_open_port()
17
+ if candidate_port not in reserved_port_range:
18
+ return candidate_port
19
+ - return _get_open_port()
20
+ + return _get_open_port(_next_port)
21
+
22
+
23
+ def get_open_ports_list(count: int = 5) -> list[int]:
24
+ @@ -174,13 +177,15 @@ def get_open_ports_list(count: int = 5) -> list[int]:
25
+ return list(ports)
26
+
27
+
28
+ -def _get_open_port() -> int:
29
+ - port = envs.VLLM_PORT
30
+ +def _get_open_port(start: int | None = None) -> int:
31
+ + port = start or envs.VLLM_PORT
32
+ if port is not None:
33
+ + global _next_port
34
+ while True:
35
+ try:
36
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
37
+ s.bind(("", port))
38
+ + _next_port = port + 1
39
+ return port
40
+ except OSError:
41
+ port += 1 # Increment port number if already in use
@@ -0,0 +1,55 @@
1
+ rules:
2
+
3
+ #
4
+ # NVIDIA CUDA
5
+ #
6
+
7
+ ## Packed NVIDIA CUDA 12.9.
8
+ ##
9
+ - backend: "cuda"
10
+ services:
11
+ - "vllm"
12
+ args:
13
+ - "CUDA_VERSION=12.9"
14
+ - "VLLM_VERSION=0.13.0"
15
+ ## Packed NVIDIA CUDA 12.8.
16
+ ##
17
+ - backend: "cuda"
18
+ services:
19
+ - "vllm"
20
+ args:
21
+ - "CUDA_VERSION=12.8"
22
+ - "VLLM_VERSION=0.13.0"
23
+ ## Packed NVIDIA CUDA 12.6.
24
+ ##
25
+ - backend: "cuda"
26
+ services:
27
+ - "vllm"
28
+ args:
29
+ - "CUDA_VERSION=12.6"
30
+ - "VLLM_VERSION=0.13.0"
31
+
32
+ #
33
+ # AMD ROCm
34
+ #
35
+
36
+ ## Packed AMD ROCm 7.0.
37
+ ##
38
+ - backend: "rocm"
39
+ services:
40
+ - "vllm"
41
+ platforms:
42
+ - "linux/amd64"
43
+ args:
44
+ - "ROCM_VERSION=7.0"
45
+ - "VLLM_VERSION=0.13.0"
46
+ ## Packed AMD ROCm 6.4.
47
+ ##
48
+ - backend: "rocm"
49
+ services:
50
+ - "vllm"
51
+ platforms:
52
+ - "linux/amd64"
53
+ args:
54
+ - "ROCM_VERSION=6.4"
55
+ - "VLLM_VERSION=0.13.0"
@@ -0,0 +1,25 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG ROCM_VERSION=6.4
3
+ ARG VLLM_VERSION=0.13.0
4
+
5
+ FROM gpustack/runner:rocm${ROCM_VERSION}-vllm${VLLM_VERSION} AS vllm
6
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
7
+
8
+ ARG TARGETPLATFORM
9
+ ARG TARGETOS
10
+ ARG TARGETARCH
11
+
12
+ ## Patch
13
+
14
+ RUN --mount=type=bind,target=/workspace,rw <<EOF
15
+ # Patch
16
+
17
+ tree -hs /workspace/patches
18
+ pushd $(pip show vllm | grep Location: | cut -d" " -f 2) \
19
+ && patch -p1 < /workspace/patches/vllm_*.patch
20
+ EOF
21
+
22
+ ## Entrypoint
23
+
24
+ WORKDIR /
25
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,41 @@
1
+ diff --git a/vllm/utils/network_utils.py b/vllm/utils/network_utils.py
2
+ index 7d01533cb..311ed44df 100644
3
+ --- a/vllm/utils/network_utils.py
4
+ +++ b/vllm/utils/network_utils.py
5
+ @@ -147,6 +147,9 @@ def get_open_zmq_inproc_path() -> str:
6
+ return f"inproc://{uuid4()}"
7
+
8
+
9
+ +_next_port: int | None = None
10
+ +
11
+ +
12
+ def get_open_port() -> int:
13
+ """
14
+ Get an open port for the vLLM process to listen on.
15
+ @@ -163,7 +166,7 @@ def get_open_port() -> int:
16
+ candidate_port = _get_open_port()
17
+ if candidate_port not in reserved_port_range:
18
+ return candidate_port
19
+ - return _get_open_port()
20
+ + return _get_open_port(_next_port)
21
+
22
+
23
+ def get_open_ports_list(count: int = 5) -> list[int]:
24
+ @@ -174,13 +177,15 @@ def get_open_ports_list(count: int = 5) -> list[int]:
25
+ return list(ports)
26
+
27
+
28
+ -def _get_open_port() -> int:
29
+ - port = envs.VLLM_PORT
30
+ +def _get_open_port(start: int | None = None) -> int:
31
+ + port = start or envs.VLLM_PORT
32
+ if port is not None:
33
+ + global _next_port
34
+ while True:
35
+ try:
36
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
37
+ s.bind(("", port))
38
+ + _next_port = port + 1
39
+ return port
40
+ except OSError:
41
+ port += 1 # Increment port number if already in use
@@ -33,3 +33,5 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
33
33
  - [x] 2025-12-19: Install `petit-kernel` package for vLLM 0.12.0/0.11.2 and SGLang 0.5.6.post2/0.5.5.post3 of ROcm released images.
34
34
  - [x] 2025-12-24: Apply ATB config patches to MindIE 2.2.rc1 for CANN released images.
35
35
  - [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
36
+ - [x] 2026-01-29: Apply DP deployment patches to vLLM 0.13.0 for CUDA/ROCm released images.
37
+ - [x] 2026-01-29: Reinstall SGLang Kernel for SGLang 0.5.7 of CANN released images.