gpustack-runner 0.1.24.post3__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/PKG-INFO +21 -21
  2. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/README.md +20 -20
  3. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/_version.py +2 -2
  4. gpustack_runner-0.1.25/gpustack_runner/_version_appendix.py +1 -0
  5. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/runner.py.json +132 -0
  6. gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/cuda/Dockerfile +77 -0
  7. gpustack_runner-0.1.25/pack/.post_operation/20260203_cuda_several_patches/matrix.yaml +22 -0
  8. gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/cuda/Dockerfile +17 -0
  9. gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/matrix.yaml +56 -0
  10. gpustack_runner-0.1.25/pack/.post_operation/20260203_sglang_disable_cudnn_check/rocm/Dockerfile +17 -0
  11. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/README.md +2 -0
  12. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/Dockerfile +15 -2
  13. gpustack_runner-0.1.25/pack/cann/patches/vllm_omni/001_wrong_patch.patch +13 -0
  14. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cuda/Dockerfile +25 -7
  15. gpustack_runner-0.1.25/pack/cuda/patches/vllm_omni/001_wrong_patch.patch +13 -0
  16. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/rocm/Dockerfile +119 -6
  17. gpustack_runner-0.1.25/pack/rocm/patches/vllm_omni/001_wrong_patch.patch +13 -0
  18. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +132 -0
  19. gpustack_runner-0.1.24.post3/gpustack_runner/_version_appendix.py +0 -1
  20. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.codespelldict +0 -0
  21. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.codespellrc +0 -0
  22. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.gitattributes +0 -0
  23. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.gitignore +0 -0
  24. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.pre-commit-config.yaml +0 -0
  25. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/.python-version +0 -0
  26. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/LICENSE +0 -0
  27. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/Makefile +0 -0
  28. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/docs/index.md +0 -0
  29. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/docs/modules/gpustack_runner.md +0 -0
  30. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/__init__.py +0 -0
  31. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/__main__.py +0 -0
  32. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/__utils__.py +0 -0
  33. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/_version.pyi +0 -0
  34. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__init__.py +0 -0
  35. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/cmds/__types__.py +0 -0
  36. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/cmds/images.py +0 -0
  37. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/envs.py +0 -0
  38. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/gpustack_runner/runner.py +0 -0
  39. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/hatch.toml +0 -0
  40. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/mkdocs.yml +0 -0
  41. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
  42. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
  43. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
  44. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
  45. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
  46. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
  47. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
  48. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
  49. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
  50. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
  51. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
  52. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
  53. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
  54. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  55. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
  56. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
  57. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
  58. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
  59. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
  60. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
  61. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
  62. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
  63. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
  64. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  65. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
  66. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
  67. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
  68. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
  69. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
  70. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
  71. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
  72. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
  73. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
  74. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
  75. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
  76. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
  77. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
  78. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
  79. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
  80. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
  81. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
  82. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
  83. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
  84. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
  85. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
  86. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
  87. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
  88. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
  89. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
  90. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
  91. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
  92. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
  93. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
  94. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
  95. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
  96. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +0 -0
  97. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +0 -0
  98. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +0 -0
  99. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +0 -0
  100. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/cann/Dockerfile +0 -0
  101. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_sglang_reinstall_kernel/matrix.yaml +0 -0
  102. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/Dockerfile +0 -0
  103. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/cuda/patches/vllm_001_wrong_dp_ray.patch +0 -0
  104. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/matrix.yaml +0 -0
  105. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/Dockerfile +0 -0
  106. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/.post_operation/20260129_vllm_patch_dp/rocm/patches/vllm_001_wrong_dp_ray.patch +0 -0
  107. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  108. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/mindie-atb-models_2.3.0_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  109. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/cann/patches/mindie.zip +0 -0
  110. /gpustack_runner-0.1.24.post3/pack/cann/patches/vllm_001_wrong_dp_ray.patch → /gpustack_runner-0.1.25/pack/cann/patches/vllm/001_wrong_dp_ray.patch +0 -0
  111. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/corex/Dockerfile +0 -0
  112. /gpustack_runner-0.1.24.post3/pack/cuda/patches/vllm_001_wrong_dp_ray.patch → /gpustack_runner-0.1.25/pack/cuda/patches/vllm/001_wrong_dp_ray.patch +0 -0
  113. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/discard_runner.sh +0 -0
  114. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/dtk/Dockerfile +0 -0
  115. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/expand_matrix.sh +0 -0
  116. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/hggc/Dockerfile +0 -0
  117. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/maca/Dockerfile +0 -0
  118. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/matrix.yaml +0 -0
  119. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/merge_runner.sh +0 -0
  120. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/musa/Dockerfile +0 -0
  121. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/prune_runner.sh +0 -0
  122. /gpustack_runner-0.1.24.post3/pack/rocm/patches/sglang_001_wrong_vram.patch → /gpustack_runner-0.1.25/pack/rocm/patches/sglang/001_wrong_vram.patch +0 -0
  123. /gpustack_runner-0.1.24.post3/pack/rocm/patches/vllm_001_wrong_dp_ray.patch → /gpustack_runner-0.1.25/pack/rocm/patches/vllm/001_wrong_dp_ray.patch +0 -0
  124. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/squash_expand_matrix.sh +0 -0
  125. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pack/squash_image.sh +0 -0
  126. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pyproject.toml +0 -0
  127. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/pytest.ini +0 -0
  128. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/ruff.toml +0 -0
  129. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/__init__.py +0 -0
  130. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
  131. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
  132. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
  133. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
  134. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_merge_image.json +0 -0
  135. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_replace_image_with.json +0 -0
  136. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/fixtures/test_split_image.json +0 -0
  137. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_runner.py +0 -0
  138. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tests/gpustack_runner/test_utils.py +0 -0
  139. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/activate +0 -0
  140. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat.sh +0 -0
  141. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_current_date_time.sh +0 -0
  142. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_get_temperature.sh +0 -0
  143. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_get_weather.sh +0 -0
  144. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_square_of_number.sh +0 -0
  145. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_square_root_of_number.sh +0 -0
  146. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/chat_tool_where_am_i.sh +0 -0
  147. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/run_runner.sh +0 -0
  148. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/tools/run_runner_cluster.sh +0 -0
  149. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/uv.lock +0 -0
  150. {gpustack_runner-0.1.24.post3 → gpustack_runner-0.1.25}/uv.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.24.post3
3
+ Version: 0.1.25
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -52,17 +52,17 @@ The following table lists the supported accelerated backends and their correspon
52
52
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
53
53
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
54
54
 
55
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
56
- |------------------------------|-----------|--------------------------------------------------------------------|------------------------|
57
- | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
58
- | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
59
- | 8.5 (310P) | `2.3.0` | `0.14.1` | |
60
- | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
61
- | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
62
- | 8.3 (310P) | `2.2.rc1` | | |
63
- | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
64
- | 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
65
- | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
55
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
56
+ |------------------------------|-----------|-----------------------------------|------------------------|
57
+ | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
58
+ | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
59
+ | 8.5 (310P) | `2.3.0` | `0.14.1` | |
60
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
61
+ | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
62
+ | 8.3 (310P) | `2.2.rc1` | | |
63
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
64
+ | 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
65
+ | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
66
66
 
67
67
  ### Iluvatar CoreX
68
68
 
@@ -80,11 +80,11 @@ The following table lists the supported accelerated backends and their correspon
80
80
  > - CUDA 12.6/12.4 supports Compute Capabilities:
81
81
  `7.5 8.0+PTX 8.9 9.0+PTX`.
82
82
 
83
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
84
- |------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
85
- | 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
86
- | 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
87
- | 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
83
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
84
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
85
+ | 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
86
+ | 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
87
+ | 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
88
88
 
89
89
  ### Hygon DTK
90
90
 
@@ -128,10 +128,10 @@ The following table lists the supported accelerated backends and their correspon
128
128
  > - ROCm 6.4 SGLang supports `gfx942` only.
129
129
  > - ROCm 7.0 SGLang supports `gfx950` only.
130
130
 
131
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
- |------------------------------|-------------------------------------------------|--------------------------------------------|
133
- | 7.0 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
134
- | 6.4 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
131
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
133
+ | 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
134
+ | 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
135
135
 
136
136
  ## Directory Structure
137
137
 
@@ -32,17 +32,17 @@ The following table lists the supported accelerated backends and their correspon
32
32
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
33
33
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
34
34
 
35
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
36
- |------------------------------|-----------|--------------------------------------------------------------------|------------------------|
37
- | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
38
- | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
39
- | 8.5 (310P) | `2.3.0` | `0.14.1` | |
40
- | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
41
- | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
42
- | 8.3 (310P) | `2.2.rc1` | | |
43
- | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~ | `0.5.2`, `0.5.1.post3` |
44
- | 8.2 (910B) | `2.1.rc2` | `0.10.2`, ~~`0.10.1.1`~~, <br/>`0.10.0`, `0.9.2`, <br/>~~`0.9.1`~~ | `0.5.2`, `0.5.1.post3` |
45
- | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
35
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
36
+ |------------------------------|-----------|-----------------------------------|------------------------|
37
+ | 8.5 (A3/910C) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
38
+ | 8.5 (910B) | `2.3.0` | `0.14.1`, `0.13.0` | `0.5.8` |
39
+ | 8.5 (310P) | `2.3.0` | `0.14.1` | |
40
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
41
+ | 8.3 (910B) | `2.2.rc1` | `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
42
+ | 8.3 (310P) | `2.2.rc1` | | |
43
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2` | `0.5.2`, `0.5.1.post3` |
44
+ | 8.2 (910B) | `2.1.rc2` | `0.10.2`, `0.10.0`, <br/>`0.9.2` | `0.5.2`, `0.5.1.post3` |
45
+ | 8.2 (310P) | `2.1.rc2` | `0.10.0`, `0.9.2` | |
46
46
 
47
47
  ### Iluvatar CoreX
48
48
 
@@ -60,11 +60,11 @@ The following table lists the supported accelerated backends and their correspon
60
60
  > - CUDA 12.6/12.4 supports Compute Capabilities:
61
61
  `7.5 8.0+PTX 8.9 9.0+PTX`.
62
62
 
63
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
64
- |------------------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------|----------|
65
- | 12.9 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
66
- | 12.8 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.4.post3`~~ | `0.0.21` |
67
- | 12.6 | `0.14.1`, **`0.13.0`**, <br/>`0.12.0`, `0.11.2`, <br/>`0.10.2` | | `0.0.21` |
63
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
64
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|----------|
65
+ | 12.9 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` | |
66
+ | 12.8 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` | `0.0.21` |
67
+ | 12.6 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | | `0.0.21` |
68
68
 
69
69
  ### Hygon DTK
70
70
 
@@ -108,10 +108,10 @@ The following table lists the supported accelerated backends and their correspon
108
108
  > - ROCm 6.4 SGLang supports `gfx942` only.
109
109
  > - ROCm 7.0 SGLang supports `gfx950` only.
110
110
 
111
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
- |------------------------------|-------------------------------------------------|--------------------------------------------|
113
- | 7.0 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` |
114
- | 6.4 | **`0.13.0`**, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, <br/>`0.5.5.post3` |
111
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
112
+ |------------------------------|----------------------------------------------------------------------|-----------------------------------------------------|
113
+ | 7.0 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2` |
114
+ | 6.4 | `0.15.0`, `0.14.1`, <br/>`0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.8`, `0.5.7`, <br/>`0.5.6.post2`, `0.5.5.post3` |
115
115
 
116
116
  ## Directory Structure
117
117
 
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.24.post3'
31
- __version_tuple__ = version_tuple = (0, 1, 24, 'post3')
30
+ __version__ = version = '0.1.25'
31
+ __version_tuple__ = version_tuple = (0, 1, 25)
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -0,0 +1 @@
1
+ git_commit = "b005327"
@@ -868,6 +868,28 @@
868
868
  "docker_image": "gpustack/runner:cuda12.9-sglang0.5.6.post2",
869
869
  "deprecated": false
870
870
  },
871
+ {
872
+ "backend": "cuda",
873
+ "backend_version": "12.9",
874
+ "original_backend_version": "12.9.1",
875
+ "backend_variant": "",
876
+ "service": "vllm",
877
+ "service_version": "0.15.0",
878
+ "platform": "linux/amd64",
879
+ "docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
880
+ "deprecated": false
881
+ },
882
+ {
883
+ "backend": "cuda",
884
+ "backend_version": "12.9",
885
+ "original_backend_version": "12.9.1",
886
+ "backend_variant": "",
887
+ "service": "vllm",
888
+ "service_version": "0.15.0",
889
+ "platform": "linux/arm64",
890
+ "docker_image": "gpustack/runner:cuda12.9-vllm0.15.0",
891
+ "deprecated": false
892
+ },
871
893
  {
872
894
  "backend": "cuda",
873
895
  "backend_version": "12.9",
@@ -1077,6 +1099,28 @@
1077
1099
  "docker_image": "gpustack/runner:cuda12.8-sglang0.5.4.post3",
1078
1100
  "deprecated": true
1079
1101
  },
1102
+ {
1103
+ "backend": "cuda",
1104
+ "backend_version": "12.8",
1105
+ "original_backend_version": "12.8.1",
1106
+ "backend_variant": "",
1107
+ "service": "vllm",
1108
+ "service_version": "0.15.0",
1109
+ "platform": "linux/amd64",
1110
+ "docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
1111
+ "deprecated": false
1112
+ },
1113
+ {
1114
+ "backend": "cuda",
1115
+ "backend_version": "12.8",
1116
+ "original_backend_version": "12.8.1",
1117
+ "backend_variant": "",
1118
+ "service": "vllm",
1119
+ "service_version": "0.15.0",
1120
+ "platform": "linux/arm64",
1121
+ "docker_image": "gpustack/runner:cuda12.8-vllm0.15.0",
1122
+ "deprecated": false
1123
+ },
1080
1124
  {
1081
1125
  "backend": "cuda",
1082
1126
  "backend_version": "12.8",
@@ -1297,6 +1341,28 @@
1297
1341
  "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
1298
1342
  "deprecated": true
1299
1343
  },
1344
+ {
1345
+ "backend": "cuda",
1346
+ "backend_version": "12.6",
1347
+ "original_backend_version": "12.6.3",
1348
+ "backend_variant": "",
1349
+ "service": "vllm",
1350
+ "service_version": "0.15.0",
1351
+ "platform": "linux/amd64",
1352
+ "docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
1353
+ "deprecated": false
1354
+ },
1355
+ {
1356
+ "backend": "cuda",
1357
+ "backend_version": "12.6",
1358
+ "original_backend_version": "12.6.3",
1359
+ "backend_variant": "",
1360
+ "service": "vllm",
1361
+ "service_version": "0.15.0",
1362
+ "platform": "linux/arm64",
1363
+ "docker_image": "gpustack/runner:cuda12.6-vllm0.15.0",
1364
+ "deprecated": false
1365
+ },
1300
1366
  {
1301
1367
  "backend": "cuda",
1302
1368
  "backend_version": "12.6",
@@ -1748,6 +1814,17 @@
1748
1814
  "docker_image": "gpustack/runner:musa4.1-vllm0.9.2",
1749
1815
  "deprecated": false
1750
1816
  },
1817
+ {
1818
+ "backend": "rocm",
1819
+ "backend_version": "7.0",
1820
+ "original_backend_version": "7.0.2",
1821
+ "backend_variant": "",
1822
+ "service": "sglang",
1823
+ "service_version": "0.5.8",
1824
+ "platform": "linux/amd64",
1825
+ "docker_image": "gpustack/runner:rocm7.0-sglang0.5.8",
1826
+ "deprecated": false
1827
+ },
1751
1828
  {
1752
1829
  "backend": "rocm",
1753
1830
  "backend_version": "7.0",
@@ -1770,6 +1847,28 @@
1770
1847
  "docker_image": "gpustack/runner:rocm7.0-sglang0.5.6.post2",
1771
1848
  "deprecated": false
1772
1849
  },
1850
+ {
1851
+ "backend": "rocm",
1852
+ "backend_version": "7.0",
1853
+ "original_backend_version": "7.0.2",
1854
+ "backend_variant": "",
1855
+ "service": "vllm",
1856
+ "service_version": "0.15.0",
1857
+ "platform": "linux/amd64",
1858
+ "docker_image": "gpustack/runner:rocm7.0-vllm0.15.0",
1859
+ "deprecated": false
1860
+ },
1861
+ {
1862
+ "backend": "rocm",
1863
+ "backend_version": "7.0",
1864
+ "original_backend_version": "7.0.2",
1865
+ "backend_variant": "",
1866
+ "service": "vllm",
1867
+ "service_version": "0.14.1",
1868
+ "platform": "linux/amd64",
1869
+ "docker_image": "gpustack/runner:rocm7.0-vllm0.14.1",
1870
+ "deprecated": false
1871
+ },
1773
1872
  {
1774
1873
  "backend": "rocm",
1775
1874
  "backend_version": "7.0",
@@ -1814,6 +1913,17 @@
1814
1913
  "docker_image": "gpustack/runner:rocm7.0-vllm0.11.0",
1815
1914
  "deprecated": true
1816
1915
  },
1916
+ {
1917
+ "backend": "rocm",
1918
+ "backend_version": "6.4",
1919
+ "original_backend_version": "6.4.4",
1920
+ "backend_variant": "",
1921
+ "service": "sglang",
1922
+ "service_version": "0.5.8",
1923
+ "platform": "linux/amd64",
1924
+ "docker_image": "gpustack/runner:rocm6.4-sglang0.5.8",
1925
+ "deprecated": false
1926
+ },
1817
1927
  {
1818
1928
  "backend": "rocm",
1819
1929
  "backend_version": "6.4",
@@ -1847,6 +1957,28 @@
1847
1957
  "docker_image": "gpustack/runner:rocm6.4-sglang0.5.5.post3",
1848
1958
  "deprecated": false
1849
1959
  },
1960
+ {
1961
+ "backend": "rocm",
1962
+ "backend_version": "6.4",
1963
+ "original_backend_version": "6.4.4",
1964
+ "backend_variant": "",
1965
+ "service": "vllm",
1966
+ "service_version": "0.15.0",
1967
+ "platform": "linux/amd64",
1968
+ "docker_image": "gpustack/runner:rocm6.4-vllm0.15.0",
1969
+ "deprecated": false
1970
+ },
1971
+ {
1972
+ "backend": "rocm",
1973
+ "backend_version": "6.4",
1974
+ "original_backend_version": "6.4.4",
1975
+ "backend_variant": "",
1976
+ "service": "vllm",
1977
+ "service_version": "0.14.1",
1978
+ "platform": "linux/amd64",
1979
+ "docker_image": "gpustack/runner:rocm6.4-vllm0.14.1",
1980
+ "deprecated": false
1981
+ },
1850
1982
  {
1851
1983
  "backend": "rocm",
1852
1984
  "backend_version": "6.4",
@@ -0,0 +1,77 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CUDA_VERSION=12.8
3
+ ARG VLLM_VERSION=0.14.1
4
+ ARG SGLANG_VERSION=0.5.8
5
+
6
+ FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
7
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
8
+
9
+ ARG TARGETPLATFORM
10
+ ARG TARGETOS
11
+ ARG TARGETARCH
12
+
13
+ ## Update CuDNN and NCCL packages
14
+
15
+ RUN <<EOF
16
+ # Update CuDNN and NCCL packages
17
+
18
+ IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
19
+
20
+ # Install
21
+ cat <<EOT >/tmp/requirements.txt
22
+ nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
23
+ nvidia-cudnn-frontend>=1.17.0
24
+ nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
25
+ EOT
26
+ uv pip install \
27
+ -r /tmp/requirements.txt
28
+
29
+ # Review
30
+ uv pip tree
31
+
32
+ # Cleanup
33
+ rm -rf /var/tmp/* \
34
+ && rm -rf /tmp/*
35
+ EOF
36
+
37
+ ## Entrypoint
38
+
39
+ WORKDIR /
40
+ ENTRYPOINT [ "tini", "--" ]
41
+
42
+
43
+ FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
44
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
45
+
46
+ ARG TARGETPLATFORM
47
+ ARG TARGETOS
48
+ ARG TARGETARCH
49
+
50
+ ## Update CuDNN and NCCL packages
51
+
52
+ RUN <<EOF
53
+ # Update CuDNN and NCCL packages
54
+
55
+ IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
56
+
57
+ # Install
58
+ cat <<EOT >/tmp/requirements.txt
59
+ nvidia-cudnn-cu${CUDA_MAJOR}>=9.16.0.29
60
+ nvidia-cudnn-frontend>=1.17.0
61
+ nvidia-nccl-cu${CUDA_MAJOR}>=2.28.3
62
+ EOT
63
+ uv pip install \
64
+ -r /tmp/requirements.txt
65
+
66
+ # Review
67
+ uv pip tree
68
+
69
+ # Cleanup
70
+ rm -rf /var/tmp/* \
71
+ && rm -rf /tmp/*
72
+ EOF
73
+
74
+ ## Entrypoint
75
+
76
+ WORKDIR /
77
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,22 @@
1
+ rules:
2
+
3
+ #
4
+ # NVIDIA CUDA
5
+ #
6
+
7
+ ## Packed NVIDIA CUDA 12.9.
8
+ ##
9
+ - backend: "cuda"
10
+ services:
11
+ - "vllm"
12
+ args:
13
+ - "CUDA_VERSION=12.9"
14
+ - "VLLM_VERSION=0.15.0"
15
+ - backend: "cuda"
16
+ services:
17
+ - "vllm"
18
+ - "sglang"
19
+ args:
20
+ - "CUDA_VERSION=12.9"
21
+ - "VLLM_VERSION=0.14.1"
22
+ - "SGLANG_VERSION=0.5.8"
@@ -0,0 +1,17 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CUDA_VERSION=12.8
3
+ ARG SGLANG_VERSION=0.5.8
4
+
5
+ FROM gpustack/runner:cuda${CUDA_VERSION}-sglang${SGLANG_VERSION} AS sglang
6
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
7
+
8
+ ARG TARGETPLATFORM
9
+ ARG TARGETOS
10
+ ARG TARGETARCH
11
+
12
+ ## Entrypoint
13
+
14
+ ENV SGLANG_DISABLE_CUDNN_CHECK=1
15
+
16
+ WORKDIR /
17
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,56 @@
1
+ rules:
2
+
3
+ #
4
+ # NVIDIA CUDA
5
+ #
6
+
7
+ ## Packed NVIDIA CUDA 12.9.
8
+ ##
9
+ - backend: "cuda"
10
+ services:
11
+ - "sglang"
12
+ args:
13
+ - "CUDA_VERSION=12.9"
14
+ - "VLLM_VERSION=0.14.1"
15
+ - "SGLANG_VERSION=0.5.8"
16
+
17
+ #
18
+ # AMD ROCm
19
+ #
20
+
21
+ ## Packed ROCm 7.0.
22
+ ##
23
+ - backend: "rocm"
24
+ services:
25
+ - "sglang"
26
+ platforms:
27
+ - "linux/amd64"
28
+ args:
29
+ - "ROCM_VERSION=7.0"
30
+ - "SGLANG_VERSION=0.5.8"
31
+ - backend: "rocm"
32
+ services:
33
+ - "sglang"
34
+ platforms:
35
+ - "linux/amd64"
36
+ args:
37
+ - "ROCM_VERSION=7.0"
38
+ - "SGLANG_VERSION=0.5.7"
39
+ ## Packed ROCm 6.4.
40
+ ##
41
+ - backend: "rocm"
42
+ services:
43
+ - "sglang"
44
+ platforms:
45
+ - "linux/amd64"
46
+ args:
47
+ - "ROCM_VERSION=6.4"
48
+ - "SGLANG_VERSION=0.5.8"
49
+ - backend: "rocm"
50
+ services:
51
+ - "sglang"
52
+ platforms:
53
+ - "linux/amd64"
54
+ args:
55
+ - "ROCM_VERSION=6.4"
56
+ - "SGLANG_VERSION=0.5.7"
@@ -0,0 +1,17 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG ROCM_VERSION=7.0
3
+ ARG SGLANG_VERSION=0.5.8
4
+
5
+ FROM gpustack/runner:rocm${ROCM_VERSION}-sglang${SGLANG_VERSION} AS sglang
6
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
7
+
8
+ ARG TARGETPLATFORM
9
+ ARG TARGETOS
10
+ ARG TARGETARCH
11
+
12
+ ## Entrypoint
13
+
14
+ ENV SGLANG_DISABLE_CUDNN_CHECK=1
15
+
16
+ WORKDIR /
17
+ ENTRYPOINT [ "tini", "--" ]
@@ -35,3 +35,5 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
35
35
  - [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.
36
36
  - [x] 2026-01-29: Apply DP deployment patches to vLLM 0.13.0 for CUDA/ROCm released images.
37
37
  - [x] 2026-01-29: Reinstall SGLang Kernel for SGLang 0.5.7 of CANN released images.
38
+ - [x] 2026-02-03: Apply several patches to vLLM 0.14.1/0.15.0 and SGLang 0.5.8 for CUDA 12.9 released images.
39
+ - [x] 2026-02-03: Patch SGLang 0.5.8/0.5.7 of CUDA/ROCm released images to disable CuDNN version check.
@@ -59,7 +59,7 @@ ARG VLLM_VERSION=0.14.1
59
59
  ARG VLLM_ASCEND_VERSION=0.14.0rc1
60
60
  ARG VLLM_TORCH_VERSION=2.9.0
61
61
  ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
62
- ARG VLLM_OMNI_COMMIT=e8aa32b
62
+ ARG VLLM_OMNI_COMMIT=de2cac9
63
63
  ARG SGLANG_BASE_IMAGE=gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-python${PYTHON_VERSION}
64
64
  ARG SGLANG_VERSION=0.5.8
65
65
  ARG SGLANG_TORCH_VERSION=2.8.0
@@ -865,6 +865,15 @@ RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
865
865
  uv pip install --no-build-isolation \
866
866
  /omni/workspace/*.whl
867
867
 
868
+ # Dependencies
869
+ uv pip uninstall onnxruntime || true
870
+ cat <<EOT >/tmp/requirements.txt
871
+ onnxruntime-cann
872
+ sox
873
+ EOT
874
+ uv pip install \
875
+ -r /tmp/requirements.txt
876
+
868
877
  # Cleanup
869
878
  rm -rf /var/tmp/* \
870
879
  && rm -rf /tmp/*
@@ -956,7 +965,11 @@ RUN --mount=type=bind,target=/workspace,rw <<EOF
956
965
 
957
966
  tree -hs /workspace/patches
958
967
  pushd $(pip show vllm | grep Location: | cut -d" " -f 2) \
959
- && patch -p1 < /workspace/patches/vllm_*.patch
968
+ && patch -p1 < /workspace/patches/vllm/*.patch
969
+ if pip show vllm_omni > /dev/null 2>&1; then \
970
+ pushd $(pip show vllm_omni | grep Location: | cut -d" " -f 2) \
971
+ && patch -p1 < /workspace/patches/vllm_omni/*.patch; \
972
+ fi
960
973
  EOF
961
974
 
962
975
  ## Entrypoint
@@ -0,0 +1,13 @@
1
+ diff --git a/vllm_omni/patch.py b/vllm_omni/patch.py
2
+ index 687ff51..6b67924 100644
3
+ --- a/vllm_omni/patch.py
4
+ +++ b/vllm_omni/patch.py
5
+ @@ -19,6 +19,8 @@ for module_name, module in sys.modules.items():
6
+ # only do patch on module of vllm, pass others
7
+ if "vllm" not in module_name:
8
+ continue
9
+ + if "--omni" not in sys.argv:
10
+ + continue
11
+ if hasattr(module, "EngineCoreOutput") and module.EngineCoreOutput == _OriginalEngineCoreOutput:
12
+ module.EngineCoreOutput = OmniEngineCoreOutput
13
+ if hasattr(module, "EngineCoreOutputs") and module.EngineCoreOutputs == _OriginalEngineCoreOutputs: