gpustack-runner 0.1.24.post4__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/PKG-INFO +21 -21
  2. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/README.md +20 -20
  3. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/_version.py +2 -2
  4. gpustack_runner-0.1.25/gpustack_runner/_version_appendix.py +1 -0
  5. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/runner.py.json +88 -0
  6. gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/cuda/Dockerfile +77 -0
  7. gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/matrix.yaml +22 -0
  8. gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/cuda/Dockerfile +17 -0
  9. gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/matrix.yaml +56 -0
  10. gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/rocm/Dockerfile +17 -0
  11. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/README.md +2 -0
  12. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/Dockerfile +1 -1
  13. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cuda/Dockerfile +12 -6
  14. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/Dockerfile +99 -4
  15. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +88 -0
  16. gpustack_runner-0.1.24.post4/gpustack_runner/_version_appendix.py +0 -1
  17. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.codespelldict +0 -0
  18. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.codespellrc +0 -0
  19. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.gitattributes +0 -0
  20. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.gitignore +0 -0
  21. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.pre-commit-config.yaml +0 -0
  22. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/.python-version +0 -0
  23. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/LICENSE +0 -0
  24. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/Makefile +0 -0
  25. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/docs/index.md +0 -0
  26. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/docs/modules/gpustack_runner.md +0 -0
  27. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/__init__.py +0 -0
  28. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/__main__.py +0 -0
  29. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/__utils__.py +0 -0
  30. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/_version.pyi +0 -0
  31. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__init__.py +0 -0
  32. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__types__.py +0 -0
  33. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/cmds/images.py +0 -0
  34. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/envs.py +0 -0
  35. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/gpustack_runner/runner.py +0 -0
  36. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/hatch.toml +0 -0
  37. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/mkdocs.yml +0 -0
  38. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
  39. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
  40. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
  41. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
  42. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
  43. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
  44. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
  45. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
  46. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
  47. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
  48. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
  49. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
  50. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
  51. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  52. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
  53. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
  54. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
  55. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
  56. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
  57. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
  58. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
  59. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
  60. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
  61. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  62. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
  63. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
  64. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
  65. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
  66. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
  67. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
  68. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
  69. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
  70. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
  71. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
  72. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
  73. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
  74. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
  75. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
  76. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
  77. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
  78. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
  79. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
  80. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
  81. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
  82. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
  83. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
  84. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
  85. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
  86. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
  87. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
  88. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
  89. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
  90. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
  91. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
  92. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
  93. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
  94. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
  95. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
  96. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
  97. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile +0 -0
  98. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml +0 -0
  99. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/Dockerfile +0 -0
  100. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/patches/vllm_001_wrong_dp_ray.patch +0 -0
  101. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/matrix.yaml +0 -0
  102. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/Dockerfile +0 -0
  103. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/patches/vllm_001_wrong_dp_ray.patch +0 -0
  104. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  105. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  106. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/patches/mindie.zip +0 -0
  107. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/patches/vllm/001_wrong_dp_ray.patch +0 -0
  108. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cann/patches/vllm_omni/001_wrong_patch.patch +0 -0
  109. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/corex/Dockerfile +0 -0
  110. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cuda/patches/vllm/001_wrong_dp_ray.patch +0 -0
  111. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/cuda/patches/vllm_omni/001_wrong_patch.patch +0 -0
  112. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/discard_runner.sh +0 -0
  113. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/dtk/Dockerfile +0 -0
  114. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/expand_matrix.sh +0 -0
  115. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/hggc/Dockerfile +0 -0
  116. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/maca/Dockerfile +0 -0
  117. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/matrix.yaml +0 -0
  118. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/merge_runner.sh +0 -0
  119. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/musa/Dockerfile +0 -0
  120. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/prune_runner.sh +0 -0
  121. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/patches/sglang/001_wrong_vram.patch +0 -0
  122. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/patches/vllm/001_wrong_dp_ray.patch +0 -0
  123. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/rocm/patches/vllm_omni/001_wrong_patch.patch +0 -0
  124. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/squash_expand_matrix.sh +0 -0
  125. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pack/squash_image.sh +0 -0
  126. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pyproject.toml +0 -0
  127. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/pytest.ini +0 -0
  128. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/ruff.toml +0 -0
  129. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/__init__.py +0 -0
  130. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
  131. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
  132. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
  133. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
  134. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
  135. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
  136. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
  137. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_runner.py +0 -0
  138. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_utils.py +0 -0
  139. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/activate +0 -0
  140. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat.sh +0 -0
  141. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_current_date_time.sh +0 -0
  142. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_get_temperature.sh +0 -0
  143. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_get_weather.sh +0 -0
  144. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_square_of_number.sh +0 -0
  145. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_square_root_of_number.sh +0 -0
  146. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/chat_tool_where_am_i.sh +0 -0
  147. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/run_runner.sh +0 -0
  148. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/tools/run_runner_cluster.sh +0 -0
  149. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/uv.lock +0 -0
  150. {gpustack_runner-0.1.24.post4 → gpustack_runner-0.1.25}/uv.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.24.post4
3
+ Version: 0.1.25
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -52,17 +52,17 @@ The following table lists the supported accelerated backends and their correspon
52
52
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
53
53
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
54
54
 
55
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
56
- |------------------------------|-----------|--------------------------------------------------------------------|------------------------|
57
- | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
58
- | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
59
- | 8.5 (310P) | `2.3.0` | `0.14.1` | |
60
- | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
61
- | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
62
- | 8.3 (310P) | `2.2.rc1` | | |
63
- | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
64
- | 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
65
- | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
55
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
56
+ |------------------------------|-----------|-----------------------------------|------------------------|
57
+ | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
58
+ | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
59
+ | 8.5 (310P) | `2.3.0` | `0.14.1` | |
60
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
61
+ | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
62
+ | 8.3 (310P) | `2.2.rc1` | | |
63
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
64
+ | 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
65
+ | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
66
66
 
67
67
  ### Iluvatar CoreX
68
68
 
@@ -80,11 +80,11 @@ The following table lists the supported accelerated backends and their correspon
80
80
  > - CUDA 12.6/12.4 supports Compute Capabilities:
81
81
  `7.5 8.0+PTX 8.9 9.0+PTX`.
82
82
 
83
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
84
- |------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
85
- | 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
86
- | 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
87
- | 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
83
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
84
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
85
+ | 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
86
+ | 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
87
+ | 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
88
88
 
89
89
  ### Hygon DTK
90
90
 
@@ -128,10 +128,10 @@ The following table lists the supported accelerated backends and their correspon
128
128
  > - ROCm 6.4 SGLang supports `gfx942` only.
129
129
  > - ROCm 7.0 SGLang supports `gfx950` only.
130
130
 
131
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
- |------------------------------|-----------------------------------------------------------|-----------------------------------------------------|
133
- | 7.0 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
134
- | 6.4 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
131
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
133
+ | 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
134
+ | 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
135
135
 
136
136
  ## Directory Structure
137
137
 
@@ -32,17 +32,17 @@ The following table lists the supported accelerated backends and their correspon
32
32
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
33
33
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
34
34
 
35
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
36
- |------------------------------|-----------|--------------------------------------------------------------------|------------------------|
37
- | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
38
- | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
39
- | 8.5 (310P) | `2.3.0` | `0.14.1` | |
40
- | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
41
- | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
42
- | 8.3 (310P) | `2.2.rc1` | | |
43
- | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
44
- | 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
45
- | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
35
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
36
+ |------------------------------|-----------|-----------------------------------|------------------------|
37
+ | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
38
+ | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
39
+ | 8.5 (310P) | `2.3.0` | `0.14.1` | |
40
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
41
+ | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
42
+ | 8.3 (310P) | `2.2.rc1` | | |
43
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
44
+ | 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
45
+ | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
46
46
 
47
47
  ### Iluvatar CoreX
48
48
 
@@ -60,11 +60,11 @@ The following table lists the supported accelerated backends and their correspon
60
60
  > - CUDA 12.6/12.4 supports Compute Capabilities:
61
61
  `7.5 8.0+PTX 8.9 9.0+PTX`.
62
62
 
63
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
64
- |------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
65
- | 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
66
- | 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
67
- | 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
63
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
64
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
65
+ | 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
66
+ | 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
67
+ | 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
68
68
 
69
69
  ### Hygon DTK
70
70
 
@@ -108,10 +108,10 @@ The following table lists the supported accelerated backends and their correspon
108
108
  > - ROCm 6.4 SGLang supports `gfx942` only.
109
109
  > - ROCm 7.0 SGLang supports `gfx950` only.
110
110
 
111
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
- |------------------------------|-----------------------------------------------------------|-----------------------------------------------------|
113
- | 7.0 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
114
- | 6.4 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
111
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
113
+ | 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
114
+ | 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
115
115
 
116
116
  ## Directory Structure
117
117
 
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.24.post4'
31
- __version_tuple__ = version_tuple = (0, 1, 24, 'post4')
30
+ __version__ = version = '0.1.25'
31
+ __version_tuple__ = version_tuple = (0, 1, 25)
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -0,0 +1 @@
1
+ git_commit = "b005327"
@@ -868,6 +868,28 @@
868
868
  "docker_image": "gpustack/runner:cuda12.9-sglang0.5.6.post2",
869
869
  "deprecated": false
870
870
  },
871
+ {
872
+ "backend": "cuda",
873
+ "backend_version": "12.9",
874
+ "original_backend_version": "12.9.1",
875
+ "backend_variant": "",
876
+ "service": "vllm",
877
+ "service_version": "0.15.0",
878
+ "platform": "linux/amd64",
879
+ "docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
880
+ "deprecated": false
881
+ },
882
+ {
883
+ "backend": "cuda",
884
+ "backend_version": "12.9",
885
+ "original_backend_version": "12.9.1",
886
+ "backend_variant": "",
887
+ "service": "vllm",
888
+ "service_version": "0.15.0",
889
+ "platform": "linux/arm64",
890
+ "docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
891
+ "deprecated": false
892
+ },
871
893
  {
872
894
  "backend": "cuda",
873
895
  "backend_version": "12.9",
@@ -1077,6 +1099,28 @@
1077
1099
  "docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
1078
1100
  "deprecated": true
1079
1101
  },
1102
+ {
1103
+ "backend": "cuda",
1104
+ "backend_version": "12.8",
1105
+ "original_backend_version": "12.8.1",
1106
+ "backend_variant": "",
1107
+ "service": "vllm",
1108
+ "service_version": "0.15.0",
1109
+ "platform": "linux/amd64",
1110
+ "docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
1111
+ "deprecated": false
1112
+ },
1113
+ {
1114
+ "backend": "cuda",
1115
+ "backend_version": "12.8",
1116
+ "original_backend_version": "12.8.1",
1117
+ "backend_variant": "",
1118
+ "service": "vllm",
1119
+ "service_version": "0.15.0",
1120
+ "platform": "linux/arm64",
1121
+ "docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
1122
+ "deprecated": false
1123
+ },
1080
1124
  {
1081
1125
  "backend": "cuda",
1082
1126
  "backend_version": "12.8",
@@ -1297,6 +1341,28 @@
1297
1341
  "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
1298
1342
  "deprecated": true
1299
1343
  },
1344
+ {
1345
+ "backend": "cuda",
1346
+ "backend_version": "12.6",
1347
+ "original_backend_version": "12.6.3",
1348
+ "backend_variant": "",
1349
+ "service": "vllm",
1350
+ "service_version": "0.15.0",
1351
+ "platform": "linux/amd64",
1352
+ "docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
1353
+ "deprecated": false
1354
+ },
1355
+ {
1356
+ "backend": "cuda",
1357
+ "backend_version": "12.6",
1358
+ "original_backend_version": "12.6.3",
1359
+ "backend_variant": "",
1360
+ "service": "vllm",
1361
+ "service_version": "0.15.0",
1362
+ "platform": "linux/arm64",
1363
+ "docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
1364
+ "deprecated": false
1365
+ },
1300
1366
  {
1301
1367
  "backend": "cuda",
1302
1368
  "backend_version": "12.6",
@@ -1781,6 +1847,17 @@
1781
1847
  "docker_image": "gpustack/runner:rocm7.0-sglang0.5.6.post2",
1782
1848
  "deprecated": false
1783
1849
  },
1850
+ {
1851
+ "backend": "rocm",
1852
+ "backend_version": "7.0",
1853
+ "original_backend_version": "7.0.2",
1854
+ "backend_variant": "",
1855
+ "service": "vllm",
1856
+ "service_version": "0.15.0",
1857
+ "platform": "linux/amd64",
1858
+ "docker_image": "gpustack/runner:rocm7.0-vllm0.15.0",
1859
+ "deprecated": false
1860
+ },
1784
1861
  {
1785
1862
  "backend": "rocm",
1786
1863
  "backend_version": "7.0",
@@ -1880,6 +1957,17 @@
1880
1957
  "docker_image": "gpustack/runner:rocm6.4-sglang0.5.5.post3",
1881
1958
  "deprecated": false
1882
1959
  },
1960
+ {
1961
+ "backend": "rocm",
1962
+ "backend_version": "6.4",
1963
+ "original_backend_version": "6.4.4",
1964
+ "backend_variant": "",
1965
+ "service": "vllm",
1966
+ "service_version": "0.15.0",
1967
+ "platform": "linux/amd64",
1968
+ "docker_image": "gpustack/runner:rocm6.4-vllm0.15.0",
1969
+ "deprecated": false
1970
+ },
1883
1971
  {
1884
1972
  "backend": "rocm",
1885
1973
  "backend_version": "6.4",
@@ -0,0 +1,77 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CUDA_VERSION=12.8
3
+ ARG VLLM_VERSION=0.14.1
4
+ ARG SGLANG_VERSION=0.5.8
5
+
6
+ FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
7
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
8
+
9
+ ARG TARGETPLATFORM
10
+ ARG TARGETOS
11
+ ARG TARGETARCH
12
+
13
+ ## Update CuDNN and NCCL packages
14
+
15
+ RUN <<EOF
16
+ # Update CuDNN and NCCL packages
17
+
18
+ IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
19
+
20
+ # Install
21
+ cat <<EOT >/tmp/requirements.txt
22
+ nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
23
+ nvidia-cudnn-frontend>=1.17.0
24
+ nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
25
+ EOT
26
+ uv pip install \
27
+ -r /tmp/requirements.txt
28
+
29
+ # Review
30
+ uv pip tree
31
+
32
+ # Cleanup
33
+ rm -rf /var/tmp/* \
34
+ && rm -rf /tmp/*
35
+ EOF
36
+
37
+ ## Entrypoint
38
+
39
+ WORKDIR /
40
+ ENTRYPOINT [ "tini", "--" ]
41
+
42
+
43
+ FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
44
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
45
+
46
+ ARG TARGETPLATFORM
47
+ ARG TARGETOS
48
+ ARG TARGETARCH
49
+
50
+ ## Update CuDNN and NCCL packages
51
+
52
+ RUN <<EOF
53
+ # Update CuDNN and NCCL packages
54
+
55
+ IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
56
+
57
+ # Install
58
+ cat <<EOT >/tmp/requirements.txt
59
+ nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
60
+ nvidia-cudnn-frontend>=1.17.0
61
+ nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
62
+ EOT
63
+ uv pip install \
64
+ -r /tmp/requirements.txt
65
+
66
+ # Review
67
+ uv pip tree
68
+
69
+ # Cleanup
70
+ rm -rf /var/tmp/* \
71
+ && rm -rf /tmp/*
72
+ EOF
73
+
74
+ ## Entrypoint
75
+
76
+ WORKDIR /
77
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,22 @@
1
+ rules:
2
+
3
+ #
4
+ # NVIDIA CUDA
5
+ #
6
+
7
+ ## Packed NVIDIA CUDA 12.9.
8
+ ##
9
+ - backend: "cuda"
10
+ services:
11
+ - "vllm"
12
+ args:
13
+ - "CUDA_VERSION=12.9"
14
+ - "VLLM_VERSION=0.15.0"
15
+ - backend: "cuda"
16
+ services:
17
+ - "vllm"
18
+ - "sglang"
19
+ args:
20
+ - "CUDA_VERSION=12.9"
21
+ - "VLLM_VERSION=0.14.1"
22
+ - "SGLANG_VERSION=0.5.8"
@@ -0,0 +1,17 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CUDA_VERSION=12.8
3
+ ARG SGLANG_VERSION=0.5.8
4
+
5
+ FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
6
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
7
+
8
+ ARG TARGETPLATFORM
9
+ ARG TARGETOS
10
+ ARG TARGETARCH
11
+
12
+ ## Entrypoint
13
+
14
+ ENV SGLANG_DISABLE_CUDNN_CHECK=1
15
+
16
+ WORKDIR /
17
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,56 @@
1
+ rules:
2
+
3
+ #
4
+ # NVIDIA CUDA
5
+ #
6
+
7
+ ## Packed NVIDIA CUDA 12.9.
8
+ ##
9
+ - backend: "cuda"
10
+ services:
11
+ - "sglang"
12
+ args:
13
+ - "CUDA_VERSION=12.9"
14
+ - "VLLM_VERSION=0.14.1"
15
+ - "SGLANG_VERSION=0.5.8"
16
+
17
+ #
18
+ # AMD ROCm
19
+ #
20
+
21
+ ## Packed ROCm 7.0.
22
+ ##
23
+ - backend: "rocm"
24
+ services:
25
+ - "sglang"
26
+ platforms:
27
+ - "linux/amd64"
28
+ args:
29
+ - "ROCM_VERSION=7.0"
30
+ - "SGLANG_VERSION=0.5.8"
31
+ - backend: "rocm"
32
+ services:
33
+ - "sglang"
34
+ platforms:
35
+ - "linux/amd64"
36
+ args:
37
+ - "ROCM_VERSION=7.0"
38
+ - "SGLANG_VERSION=0.5.7"
39
+ ## Packed ROCm 6.4.
40
+ ##
41
+ - backend: "rocm"
42
+ services:
43
+ - "sglang"
44
+ platforms:
45
+ - "linux/amd64"
46
+ args:
47
+ - "ROCM_VERSION=6.4"
48
+ - "SGLANG_VERSION=0.5.8"
49
+ - backend: "rocm"
50
+ services:
51
+ - "sglang"
52
+ platforms:
53
+ - "linux/amd64"
54
+ args:
55
+ - "ROCM_VERSION=6.4"
56
+ - "SGLANG_VERSION=0.5.7"
@@ -0,0 +1,17 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG ROCM_VERSION=7.0
3
+ ARG SGLANG_VERSION=0.5.8
4
+
5
+ FROM gpustack/runner:rocm${ROCM_VERSION}-sglang${SGLANG_VERSION} AS sglang
6
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
7
+
8
+ ARG TARGETPLATFORM
9
+ ARG TARGETOS
10
+ ARG TARGETARCH
11
+
12
+ ## Entrypoint
13
+
14
+ ENV SGLANG_DISABLE_CUDNN_CHECK=1
15
+
16
+ WORKDIR /
17
+ ENTRYPOINT [ "tini", "--" ]
@@ -35,3 +35,5 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
35
35
  - [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
36
36
  - [x] 2026-01-29: Apply DP deployment patches to vLLM 0.13.0 for CUDA/ROCm released images.
37
37
  - [x] 2026-01-29: Reinstall SGLang Kernel for SGLang 0.5.7 of CANN released images.
38
+ - [x] 2026-02-03: Apply several patches to vLLM 0.14.1/0.15.0 and SGLang 0.5.8 for CUDA 12.9 released images.
39
+ - [x] 2026-02-03: Patch SGLang 0.5.8/0.5.7 of CUDA/ROCm released images to disable CuDNN version check.
@@ -59,7 +59,7 @@ ARG VLLM_VERSION=0.14.1
59
59
  ARG VLLM_ASCEND_VERSION=0.14.0rc1
60
60
  ARG VLLM_TORCH_VERSION=2.9.0
61
61
  ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
62
- ARG VLLM_OMNI_COMMIT=b11d436
62
+ ARG VLLM_OMNI_COMMIT=de2cac9
63
63
  ARG SGLANG_BASE_IMAGE=gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-python${PYTHON_VERSION}
64
64
  ARG SGLANG_VERSION=0.5.8
65
65
  ARG SGLANG_TORCH_VERSION=2.8.0
@@ -101,7 +101,7 @@ ARG VOXBOX_VERSION=0.0.21
101
101
  ARG VOXBOX_TORCH_VERSION=2.7.1
102
102
  ARG VOXBOX_TORCH_CUDA_VERSION=${CUDA_VERSION}
103
103
  ARG VLLM_BASE_IMAGE=gpustack/runner:cuda${CUDA_VERSION}-python${PYTHON_VERSION}
104
- ARG VLLM_VERSION=0.14.1
104
+ ARG VLLM_VERSION=0.15.0
105
105
  ARG VLLM_TORCH_VERSION=2.9.1
106
106
  ARG VLLM_TORCH_CUDA_VERSION=${CUDA_VERSION}
107
107
  ARG VLLM_BUILD_BASE_IMAGE=gpustack/runner:cuda${VLLM_TORCH_CUDA_VERSION}-python${PYTHON_VERSION}
@@ -111,12 +111,12 @@ ARG VLLM_NVIDIA_NVSHMEM_VERSION=3.4.5
111
111
  ARG VLLM_AWS_EFA_VERSION=1.44.0
112
112
  ARG VLLM_PPLX_KERNEL_COMMIT=12cecfda
113
113
  ARG VLLM_DEEPEP_COMMIT=b57e5e21
114
- ARG VLLM_DEEPGEMM_COMMIT=9b680f42
114
+ ARG VLLM_DEEPGEMM_COMMIT=0f5f266
115
115
  ARG VLLM_FLASHINFER_VERSION=0.6.1
116
116
  ARG VLLM_FLASHATTENTION_VERSION=2.8.3
117
117
  ARG VLLM_LMCACHE_VERSION=0.3.12
118
118
  ARG VLLM_MOONCAKE_VERSION=0.3.8.post1
119
- ARG VLLM_OMNI_COMMIT=2c294a7
119
+ ARG VLLM_OMNI_COMMIT=d6f93b0
120
120
  ARG SGLANG_BASE_IMAGE=vllm
121
121
  ARG SGLANG_VERSION=0.5.8
122
122
  ARG SGLANG_BUILD_BASE_IMAGE=vllm-build
@@ -844,7 +844,8 @@ RUN <<EOF
844
844
  git -C /tmp clone --recursive --shallow-submodules \
845
845
  https://github.com/deepseek-ai/DeepEP.git deep_ep \
846
846
  && pushd /tmp/deep_ep \
847
- && git checkout ${VLLM_DEEPEP_COMMIT}
847
+ && git checkout ${VLLM_DEEPEP_COMMIT} \
848
+ && git submodule update --init --recursive
848
849
 
849
850
  # Build
850
851
  CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
@@ -914,7 +915,8 @@ RUN <<EOF
914
915
  git -C /tmp clone --recursive --shallow-submodules \
915
916
  https://github.com/ppl-ai/pplx-kernels.git pplx-kernels \
916
917
  && pushd /tmp/pplx-kernels \
917
- && git checkout ${VLLM_PPLX_KERNEL_COMMIT}
918
+ && git checkout ${VLLM_PPLX_KERNEL_COMMIT} \
919
+ && git submodule update --init --recursive
918
920
 
919
921
  # Build
920
922
  CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
@@ -1315,6 +1317,9 @@ cuda-python==${CUDA_MAJOR}.${CUDA_MINOR}
1315
1317
  pynvml==${CUDA_MAJOR}
1316
1318
  nvidia-nvshmem-cu${CUDA_MAJOR}
1317
1319
  nvshmem4py-cu${CUDA_MAJOR}
1320
+ nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
1321
+ nvidia-cudnn-frontend>=1.17.0
1322
+ nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
1318
1323
  EOT
1319
1324
  uv pip install \
1320
1325
  -r /tmp/requirements.txt
@@ -1811,7 +1816,8 @@ EOF
1811
1816
 
1812
1817
  ## Entrypoint
1813
1818
 
1814
- ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 \
1819
+ ENV SGLANG_DISABLE_CUDNN_CHECK=1 \
1820
+ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 \
1815
1821
  SGLANG_INT4_WEIGHT=0 \
1816
1822
  SGLANG_MOE_PADDING=1 \
1817
1823
  SGLANG_SET_CPU_AFFINITY=1