gpustack-runner 0.1.23.post4__tar.gz → 0.1.23.post5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/PKG-INFO +12 -4
  2. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/README.md +11 -3
  3. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/_version.py +2 -2
  4. gpustack_runner-0.1.23.post5/gpustack_runner/_version_appendix.py +1 -0
  5. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/images.py +19 -5
  6. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/runner.py +1 -1
  7. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/runner.py.json +33 -0
  8. gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/cann/Dockerfile +81 -0
  9. gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/cuda/Dockerfile +93 -0
  10. gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/matrix.yaml +78 -0
  11. gpustack_runner-0.1.23.post5/pack/.post_operation/20260105_vllm_install_omni/rocm/Dockerfile +98 -0
  12. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/README.md +1 -0
  13. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/Dockerfile +100 -15
  14. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/corex/Dockerfile +10 -7
  15. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cuda/Dockerfile +101 -7
  16. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/dtk/Dockerfile +28 -16
  17. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/expand_matrix.sh +1 -1
  18. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/maca/Dockerfile +13 -10
  19. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/matrix.yaml +24 -0
  20. gpustack_runner-0.1.23.post5/pack/musa/Dockerfile +395 -0
  21. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/rocm/Dockerfile +108 -6
  22. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +41 -0
  23. gpustack_runner-0.1.23.post4/gpustack_runner/_version_appendix.py +0 -1
  24. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.codespelldict +0 -0
  25. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.codespellrc +0 -0
  26. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.gitattributes +0 -0
  27. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.gitignore +0 -0
  28. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.pre-commit-config.yaml +0 -0
  29. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/.python-version +0 -0
  30. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/LICENSE +0 -0
  31. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/Makefile +0 -0
  32. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/docs/index.md +0 -0
  33. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/docs/modules/gpustack_runner.md +0 -0
  34. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/__init__.py +0 -0
  35. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/__main__.py +0 -0
  36. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/_version.pyi +0 -0
  37. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/__init__.py +0 -0
  38. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/cmds/__types__.py +0 -0
  39. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/gpustack_runner/envs.py +0 -0
  40. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/hatch.toml +0 -0
  41. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/mkdocs.yml +0 -0
  42. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
  43. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
  44. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
  45. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
  46. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
  47. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
  48. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
  49. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
  50. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
  51. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
  52. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
  53. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
  54. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
  55. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  56. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
  57. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
  58. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
  59. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
  60. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
  61. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
  62. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
  63. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
  64. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
  65. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
  66. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
  67. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
  68. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
  69. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
  70. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
  71. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
  72. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
  73. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
  74. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
  75. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
  76. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
  77. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
  78. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
  79. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
  80. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
  81. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
  82. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
  83. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
  84. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
  85. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
  86. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
  87. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
  88. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
  89. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
  90. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
  91. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
  92. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
  93. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
  94. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
  95. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
  96. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
  97. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  98. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
  99. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/cann/patches/mindie.zip +0 -0
  100. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/discard_runner.sh +0 -0
  101. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/merge_runner.sh +0 -0
  102. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/prune_runner.sh +0 -0
  103. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
  104. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pyproject.toml +0 -0
  105. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/pytest.ini +0 -0
  106. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/ruff.toml +0 -0
  107. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/__init__.py +0 -0
  108. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
  109. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
  110. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
  111. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
  112. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tests/gpustack_runner/test_runner.py +0 -0
  113. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/activate +0 -0
  114. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat.sh +0 -0
  115. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_current_date_time.sh +0 -0
  116. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_get_temperature.sh +0 -0
  117. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_get_weather.sh +0 -0
  118. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_square_of_number.sh +0 -0
  119. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_square_root_of_number.sh +0 -0
  120. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/chat_tool_where_am_i.sh +0 -0
  121. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/run_runner.sh +0 -0
  122. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/tools/run_runner_cluster.sh +0 -0
  123. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/uv.lock +0 -0
  124. {gpustack_runner-0.1.23.post4 → gpustack_runner-0.1.23.post5}/uv.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.23.post4
3
+ Version: 0.1.23.post5
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -97,9 +97,9 @@ The following table lists the supported accelerated backends and their correspon
97
97
 
98
98
  ### Hygon DTK
99
99
 
100
- | DTK Version <br/> (Variant) | vLLM |
101
- |-----------------------------|------------------|
102
- | 25.04 | `0.9.2`, `0.8.5` |
100
+ | DTK Version <br/> (Variant) | vLLM |
101
+ |-----------------------------|----------------------------|
102
+ | 25.04 | `0.11.0`, `0.9.2`, `0.8.5` |
103
103
 
104
104
  ### MetaX MACA
105
105
 
@@ -108,6 +108,13 @@ The following table lists the supported accelerated backends and their correspon
108
108
  | 3.2 | `0.10.2` |
109
109
  | 3.0 | `0.9.1` |
110
110
 
111
+ ### MThreads MUSA
112
+
113
+ | MUSA Version <br/> (Variant) | vLLM | SGLang |
114
+ |------------------------------|---------|---------|
115
+ | 4.3.2 | | `0.5.2` |
116
+ | 4.1.0 | `0.9.2` | |
117
+
111
118
  ### AMD ROCm
112
119
 
113
120
  > [!CAUTION]
@@ -171,6 +178,7 @@ ARG PYTHON_VERSION=... # REQUIRED
171
178
  ARG CMAKE_MAX_JOBS=... # REQUIRED
172
179
  ARG {OTHERS} # OPTIONAL
173
180
  ARG {BACKEND}_VERSION=... # REQUIRED
181
+ ARG {BACKEND}_VERSION_EXTRA=... # OPTIONAL
174
182
  ARG {BACKEND}_ARCHS=... # REQUIRED
175
183
  ARG {BACKEND}_{OTHERS}=... # OPTIONAL
176
184
  ARG {SERVICE}_BASE_IMAGE=... # REQUIRED
@@ -77,9 +77,9 @@ The following table lists the supported accelerated backends and their correspon
77
77
 
78
78
  ### Hygon DTK
79
79
 
80
- | DTK Version <br/> (Variant) | vLLM |
81
- |-----------------------------|------------------|
82
- | 25.04 | `0.9.2`, `0.8.5` |
80
+ | DTK Version <br/> (Variant) | vLLM |
81
+ |-----------------------------|----------------------------|
82
+ | 25.04 | `0.11.0`, `0.9.2`, `0.8.5` |
83
83
 
84
84
  ### MetaX MACA
85
85
 
@@ -88,6 +88,13 @@ The following table lists the supported accelerated backends and their correspon
88
88
  | 3.2 | `0.10.2` |
89
89
  | 3.0 | `0.9.1` |
90
90
 
91
+ ### MThreads MUSA
92
+
93
+ | MUSA Version <br/> (Variant) | vLLM | SGLang |
94
+ |------------------------------|---------|---------|
95
+ | 4.3.2 | | `0.5.2` |
96
+ | 4.1.0 | `0.9.2` | |
97
+
91
98
  ### AMD ROCm
92
99
 
93
100
  > [!CAUTION]
@@ -151,6 +158,7 @@ ARG PYTHON_VERSION=... # REQUIRED
151
158
  ARG CMAKE_MAX_JOBS=... # REQUIRED
152
159
  ARG {OTHERS} # OPTIONAL
153
160
  ARG {BACKEND}_VERSION=... # REQUIRED
161
+ ARG {BACKEND}_VERSION_EXTRA=... # OPTIONAL
154
162
  ARG {BACKEND}_ARCHS=... # REQUIRED
155
163
  ARG {BACKEND}_{OTHERS}=... # OPTIONAL
156
164
  ARG {SERVICE}_BASE_IMAGE=... # REQUIRED
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.23.post4'
31
- __version_tuple__ = version_tuple = (0, 1, 23, 'post4')
30
+ __version__ = version = '0.1.23.post5'
31
+ __version_tuple__ = version_tuple = (0, 1, 23, 'post5')
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -0,0 +1 @@
1
+ git_commit = "d297d69"
@@ -444,14 +444,14 @@ class SaveImagesSubCommand(SubCommand):
444
444
 
445
445
  command = [
446
446
  "skopeo",
447
- "--override-os",
448
- override_os,
449
- "--override-arch",
450
- override_arch,
451
447
  "copy",
452
448
  "--src-tls-verify=false",
453
449
  "--retry-times",
454
450
  str(self.max_retries),
451
+ "--override-os",
452
+ override_os,
453
+ "--override-arch",
454
+ override_arch,
455
455
  ]
456
456
  if self.source_username and self.source_password:
457
457
  command.extend(
@@ -771,6 +771,10 @@ class CopyImagesSubCommand(SubCommand):
771
771
  print(f"❌ Error syncing image '{img_name}'")
772
772
  failures.append((img_name, img_err))
773
773
 
774
+ override_os, override_arch = None, None
775
+ if self.platform:
776
+ override_os, override_arch = self.platform.split("/", maxsplit=1)
777
+
774
778
  # Submit tasks
775
779
  for img in images:
776
780
  command = [
@@ -778,10 +782,20 @@ class CopyImagesSubCommand(SubCommand):
778
782
  "copy",
779
783
  "--src-tls-verify=false",
780
784
  "--dest-tls-verify=false",
781
- "--all",
782
785
  "--retry-times",
783
786
  str(self.max_retries),
784
787
  ]
788
+ if override_os and override_arch:
789
+ command.extend(
790
+ [
791
+ "--override-os",
792
+ override_os,
793
+ "--override-arch",
794
+ override_arch,
795
+ ],
796
+ )
797
+ else:
798
+ command.append("--all")
785
799
  if self.source_username and self.source_password:
786
800
  command.extend(
787
801
  [
@@ -13,7 +13,7 @@ from dataclasses_json import dataclass_json
13
13
  from . import envs
14
14
 
15
15
  _RE_DOCKER_IMAGE = re.compile(
16
- r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
16
+ r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|musa|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
17
17
  )
18
18
  """
19
19
  Regex for Docker image parsing,
@@ -1363,6 +1363,17 @@
1363
1363
  "docker_image": "gpustack/runner:cuda12.4-voxbox0.0.20",
1364
1364
  "deprecated": true
1365
1365
  },
1366
+ {
1367
+ "backend": "dtk",
1368
+ "backend_version": "25.04",
1369
+ "original_backend_version": "25.04.2",
1370
+ "backend_variant": "",
1371
+ "service": "vllm",
1372
+ "service_version": "0.11.0",
1373
+ "platform": "linux/amd64",
1374
+ "docker_image": "gpustack/runner:dtk25.04-vllm0.11.0",
1375
+ "deprecated": false
1376
+ },
1366
1377
  {
1367
1378
  "backend": "dtk",
1368
1379
  "backend_version": "25.04",
@@ -1407,6 +1418,28 @@
1407
1418
  "docker_image": "gpustack/runner:maca3.0-vllm0.9.1",
1408
1419
  "deprecated": false
1409
1420
  },
1421
+ {
1422
+ "backend": "musa",
1423
+ "backend_version": "4.3",
1424
+ "original_backend_version": "4.3.2",
1425
+ "backend_variant": "",
1426
+ "service": "sglang",
1427
+ "service_version": "0.5.7",
1428
+ "platform": "linux/amd64",
1429
+ "docker_image": "gpustack/runner:musa4.3-sglang0.5.7",
1430
+ "deprecated": false
1431
+ },
1432
+ {
1433
+ "backend": "musa",
1434
+ "backend_version": "4.1",
1435
+ "original_backend_version": "4.1.0",
1436
+ "backend_variant": "",
1437
+ "service": "vllm",
1438
+ "service_version": "0.9.2",
1439
+ "platform": "linux/amd64",
1440
+ "docker_image": "gpustack/runner:musa4.1-vllm0.9.2",
1441
+ "deprecated": false
1442
+ },
1410
1443
  {
1411
1444
  "backend": "rocm",
1412
1445
  "backend_version": "7.0",
@@ -0,0 +1,81 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CANN_VERSION=8.3
3
+ ARG CANN_ARCHS=910b
4
+ ARG VLLM_VERSION=0.12.0
5
+ ARG VLLM_OMNI_COMMIT=75cdf1c
6
+
7
+ FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-vllm${VLLM_VERSION} AS vllm-build-omni
8
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
9
+
10
+ ARG TARGETPLATFORM
11
+ ARG TARGETOS
12
+ ARG TARGETARCH
13
+
14
+ ## Build Omni
15
+
16
+ ARG CMAKE_MAX_JOBS
17
+ ARG VLLM_OMNI_COMMIT
18
+
19
+ ENV VLLM_OMNI_COMMIT=${VLLM_OMNI_COMMIT}
20
+
21
+ RUN <<EOF
22
+ # Omni
23
+
24
+ CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
25
+ if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
26
+ CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
27
+ fi
28
+ if (( $(echo "${CMAKE_MAX_JOBS} > 4" | bc -l) )); then
29
+ CMAKE_MAX_JOBS="4"
30
+ fi
31
+ export MAX_JOBS="${CMAKE_MAX_JOBS}"
32
+ export COMPILE_CUSTOM_KERNELS=1
33
+ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CANN_HOME}/ascend-toolkit/latest/$(uname -i)-linux/devlib"
34
+ export VLLM_TARGET_DEVICE="empty"
35
+ echo "Building vLLM Omni with the following environment variables:"
36
+ env
37
+
38
+ # Build
39
+ git -C /tmp clone --recursive --shallow-submodules \
40
+ https://github.com/vllm-project/vllm-omni vllm_omni \
41
+ && pushd /tmp/vllm_omni \
42
+ && git checkout ${VLLM_OMNI_COMMIT} \
43
+ && git submodule update --init --recursive
44
+ pushd /tmp/vllm_omni \
45
+ && python -v -m build --no-isolation --wheel \
46
+ && tree -hs /tmp/vllm_omni/dist \
47
+ && mv /tmp/vllm_omni/dist /workspace
48
+
49
+ # Cleanup
50
+ rm -rf /var/tmp/* \
51
+ && rm -rf /tmp/*
52
+ EOF
53
+
54
+ FROM gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-vllm${VLLM_VERSION} AS vllm
55
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
56
+
57
+ ARG TARGETPLATFORM
58
+ ARG TARGETOS
59
+ ARG TARGETARCH
60
+
61
+ ## Install Omni
62
+
63
+ RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
64
+ # Omni
65
+
66
+ # Install
67
+ uv pip install --no-build-isolation \
68
+ /omni/workspace/*.whl
69
+
70
+ # Review
71
+ uv pip tree
72
+
73
+ # Cleanup
74
+ rm -rf /var/tmp/* \
75
+ && rm -rf /tmp/*
76
+ EOF
77
+
78
+ ## Entrypoint
79
+
80
+ WORKDIR /
81
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,93 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG CUDA_VERSION=12.8
3
+ ARG VLLM_VERSION=0.12.0
4
+ ARG VLLM_OMNI_COMMIT=75cdf1c
5
+
6
+ FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm-build-omni
7
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
8
+
9
+ ARG TARGETPLATFORM
10
+ ARG TARGETOS
11
+ ARG TARGETARCH
12
+
13
+ ARG TARGETPLATFORM
14
+ ARG TARGETOS
15
+ ARG TARGETARCH
16
+
17
+ ## Build Omni
18
+
19
+ ARG CMAKE_MAX_JOBS
20
+ ARG VLLM_OMNI_COMMIT
21
+
22
+ ENV VLLM_OMNI_COMMIT=${VLLM_OMNI_COMMIT}
23
+
24
+ RUN <<EOF
25
+ # Omni
26
+
27
+ IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${VLLM_TORCH_CUDA_VERSION}"
28
+
29
+ CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
30
+ if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
31
+ CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
32
+ fi
33
+ if (( $(echo "${CMAKE_MAX_JOBS} > 4" | bc -l) )); then
34
+ CMAKE_MAX_JOBS="4"
35
+ fi
36
+ VL_CUDA_ARCHS="${CUDA_ARCHS}"
37
+ if [[ -z "${VL_CUDA_ARCHS}" ]]; then
38
+ if (( $(echo "${CUDA_MAJOR}.${CUDA_MINOR} < 12.9" | bc -l) )); then
39
+ VL_CUDA_ARCHS="7.5 8.0+PTX 8.9 9.0 10.0+PTX 12.0+PTX"
40
+ else
41
+ VL_CUDA_ARCHS="7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX"
42
+ fi
43
+ fi
44
+ export MAX_JOBS="${CMAKE_MAX_JOBS}"
45
+ export TORCH_CUDA_ARCH_LIST="${VL_CUDA_ARCHS}"
46
+ export NVCC_THREADS=1
47
+ echo "Building vLLM Omni with the following environment variables:"
48
+ env
49
+
50
+ # Build
51
+ git -C /tmp clone --recursive --shallow-submodules \
52
+ https://github.com/vllm-project/vllm-omni vllm_omni \
53
+ && pushd /tmp/vllm_omni \
54
+ && git checkout ${VLLM_OMNI_COMMIT} \
55
+ && git submodule update --init --recursive
56
+ pushd /tmp/vllm_omni \
57
+ && python -v -m build --no-isolation --wheel \
58
+ && tree -hs /tmp/vllm_omni/dist \
59
+ && mv /tmp/vllm_omni/dist /workspace
60
+
61
+ # Cleanup
62
+ rm -rf /var/tmp/* \
63
+ && rm -rf /tmp/*
64
+ EOF
65
+
66
+ FROM gpustack/runner:cuda${CUDA_VERSION}-vllm${VLLM_VERSION} AS vllm
67
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
68
+
69
+ ARG TARGETPLATFORM
70
+ ARG TARGETOS
71
+ ARG TARGETARCH
72
+
73
+ ## Install Omni
74
+
75
+ RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
76
+ # Omni
77
+
78
+ # Install
79
+ uv pip install --no-build-isolation \
80
+ /omni/workspace/*.whl
81
+
82
+ # Review
83
+ uv pip tree
84
+
85
+ # Cleanup
86
+ rm -rf /var/tmp/* \
87
+ && rm -rf /tmp/*
88
+ EOF
89
+
90
+ ## Entrypoint
91
+
92
+ WORKDIR /
93
+ ENTRYPOINT [ "tini", "--" ]
@@ -0,0 +1,78 @@
1
+ rules:
2
+
3
+ #
4
+ # NVIDIA CUDA
5
+ #
6
+
7
+ ## Packed NVIDIA CUDA 12.9.
8
+ ##
9
+ - backend: "cuda"
10
+ services:
11
+ - "vllm"
12
+ args:
13
+ - "CUDA_VERSION=12.9"
14
+ - "VLLM_VERSION=0.12.0"
15
+ ## Packed NVIDIA CUDA 12.8.
16
+ ##
17
+ - backend: "cuda"
18
+ services:
19
+ - "vllm"
20
+ args:
21
+ - "CUDA_VERSION=12.8"
22
+ - "VLLM_VERSION=0.12.0"
23
+ ## Packed NVIDIA CUDA 12.6.
24
+ ##
25
+ - backend: "cuda"
26
+ services:
27
+ - "vllm"
28
+ args:
29
+ - "CUDA_VERSION=12.6"
30
+ - "VLLM_VERSION=0.12.0"
31
+
32
+ #
33
+ # AMD ROCm
34
+ #
35
+
36
+ ## Packed AMD ROCm 7.0.
37
+ ##
38
+ - backend: "rocm"
39
+ services:
40
+ - "vllm"
41
+ platforms:
42
+ - "linux/amd64"
43
+ args:
44
+ - "ROCM_VERSION=7.0"
45
+ - "VLLM_VERSION=0.12.0"
46
+ ## Packed AMD ROCm 6.4.
47
+ ##
48
+ - backend: "rocm"
49
+ services:
50
+ - "vllm"
51
+ platforms:
52
+ - "linux/amd64"
53
+ args:
54
+ - "ROCM_VERSION=6.4"
55
+ - "VLLM_VERSION=0.12.0"
56
+
57
+ #
58
+ # Ascend CANN
59
+ #
60
+
61
+ ## Packed Ascend CANN 8.3, using CANN Kernel for A3.
62
+ ##
63
+ - backend: "cann"
64
+ services:
65
+ - "vllm"
66
+ args:
67
+ - "CANN_VERSION=8.3"
68
+ - "CANN_ARCHS=a3"
69
+ - "VLLM_VERSION=0.12.0"
70
+ ## Packed Ascend CANN 8.3, using CANN Kernel for 910B.
71
+ ##
72
+ - backend: "cann"
73
+ services:
74
+ - "vllm"
75
+ args:
76
+ - "CANN_VERSION=8.3"
77
+ - "CANN_ARCHS=910b"
78
+ - "VLLM_VERSION=0.12.0"
@@ -0,0 +1,98 @@
1
+ ARG CMAKE_MAX_JOBS
2
+ ARG ROCM_VERSION=6.4
3
+ ARG VLLM_VERSION=0.12.0
4
+ ARG VLLM_OMNI_COMMIT=75cdf1c
5
+
6
+ FROM gpustack/runner:rocm${ROCM_VERSION}-vllm${VLLM_VERSION} AS vllm-build-omni
7
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
8
+
9
+ ARG TARGETPLATFORM
10
+ ARG TARGETOS
11
+ ARG TARGETARCH
12
+
13
+ ## Build Omni
14
+
15
+ ARG CMAKE_MAX_JOBS
16
+ ARG VLLM_OMNI_COMMIT
17
+
18
+ ENV VLLM_OMNI_COMMIT=${VLLM_OMNI_COMMIT}
19
+
20
+ RUN <<EOF
21
+ # Omni
22
+
23
+ IFS="." read -r ROCM_MAJOR ROCM_MINOR ROCM_PATCH <<< "${VLLM_TORCH_ROCM_VERSION}"
24
+ IFS="." read -r VL_MAJOR VL_MINOR VL_PATCH <<< "${VLLM_VERSION}"
25
+
26
+ CMAKE_MAX_JOBS="${CMAKE_MAX_JOBS}"
27
+ if [[ -z "${CMAKE_MAX_JOBS}" ]]; then
28
+ CMAKE_MAX_JOBS="$(( $(nproc) / 2 ))"
29
+ fi
30
+ if (( $(echo "${CMAKE_MAX_JOBS} > 4" | bc -l) )); then
31
+ CMAKE_MAX_JOBS="4"
32
+ fi
33
+ VL_ROCM_ARCHS="${ROCM_ARCHS}"
34
+ if [[ -z "${VL_ROCM_ARCHS}" ]]; then
35
+ if (( $(echo "${ROCM_MAJOR}.${ROCM_MINOR} < 7.0" | bc -l) )); then
36
+ VL_ROCM_ARCHS="gfx908;gfx90a;gfx942;gfx1030;gfx1100"
37
+ if (( $(echo "${VL_MAJOR}.${VL_MINOR} == 0.13" | bc -l) )); then
38
+ # TODO(thxCode): Temporarily remove gfx1030 for vLLM ROCm build due to build error in ROCm 6.4.4.
39
+ # #15 134.9 /tmp/vllm/build/temp.linux-x86_64-cpython-312/csrc/sampler.hip:564:63: error: local memory (66032) exceeds limit (65536) in 'void vllm::topKPerRowDecode<1024, true, false, true>(float const*, int const*, int*, int, int, int, int, float*, int, int const*)'
40
+ # ##15 134.9 564 | static __global__ __launch_bounds__(kNumThreadsPerBlock) void topKPerRowDecode(
41
+ # ##15 134.9 | ^
42
+ # ##15 134.9 16 warnings and 1 error generated when compiling for gfx1030.
43
+ VL_ROCM_ARCHS="gfx908;gfx90a;gfx942"
44
+ fi
45
+ else
46
+ VL_ROCM_ARCHS="gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151"
47
+ fi
48
+ fi
49
+ export MAX_JOBS="${CMAKE_MAX_JOBS}"
50
+ export COMPILE_CUSTOM_KERNELS=1
51
+ export PYTORCH_ROCM_ARCH="${VL_ROCM_ARCHS}"
52
+ echo "Building vLLM Omni with the following environment variables:"
53
+ env
54
+
55
+ # Build
56
+ git -C /tmp clone --recursive --shallow-submodules \
57
+ https://github.com/vllm-project/vllm-omni vllm_omni \
58
+ && pushd /tmp/vllm_omni \
59
+ && git checkout ${VLLM_OMNI_COMMIT} \
60
+ && git submodule update --init --recursive
61
+ pushd /tmp/vllm_omni \
62
+ && python -v -m build --no-isolation --wheel \
63
+ && tree -hs /tmp/vllm_omni/dist \
64
+ && mv /tmp/vllm_omni/dist /workspace
65
+
66
+ # Cleanup
67
+ rm -rf /var/tmp/* \
68
+ && rm -rf /tmp/*
69
+ EOF
70
+
71
+ FROM gpustack/runner:rocm${ROCM_VERSION}-vllm${VLLM_VERSION} AS vllm
72
+ SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
73
+
74
+ ARG TARGETPLATFORM
75
+ ARG TARGETOS
76
+ ARG TARGETARCH
77
+
78
+ ## Install Omni
79
+
80
+ RUN --mount=type=bind,from=vllm-build-omni,source=/,target=/omni,rw <<EOF
81
+ # Omni
82
+
83
+ # Install
84
+ uv pip install --no-build-isolation \
85
+ /omni/workspace/*.whl
86
+
87
+ # Review
88
+ uv pip tree
89
+
90
+ # Cleanup
91
+ rm -rf /var/tmp/* \
92
+ && rm -rf /tmp/*
93
+ EOF
94
+
95
+ ## Entrypoint
96
+
97
+ WORKDIR /
98
+ ENTRYPOINT [ "tini", "--" ]
@@ -32,3 +32,4 @@ We leverage the matrix expansion feature of GPUStack Runner to achieve this, and
32
32
  - [x] 2025-12-19: Install `vLLM[audio]` packages for vLLM 0.12.0/0.11.2 of CUDA/ROCm released images.
33
33
  - [x] 2025-12-19: Install `petit-kernel` package for vLLM 0.12.0/0.11.2 and SGLang 0.5.6.post2/0.5.5.post3 of ROcm released images.
34
34
  - [x] 2025-12-24: Apply ATB config patches to MindIE 2.2.rc1 for CANN released images.
35
+ - [ ] 2026-01-05: Install `vllm-omni` packages for vLLM 0.12.0 of CUDA/ROCm/CANN released images.