sglang 0.5.1__py3-none-any.whl → 0.5.1.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +6 -2
- sglang/srt/models/grok.py +0 -4
- sglang/version.py +1 -1
- {sglang-0.5.1.dist-info → sglang-0.5.1.post1.dist-info}/METADATA +2 -2
- {sglang-0.5.1.dist-info → sglang-0.5.1.post1.dist-info}/RECORD +8 -8
- {sglang-0.5.1.dist-info → sglang-0.5.1.post1.dist-info}/WHEEL +0 -0
- {sglang-0.5.1.dist-info → sglang-0.5.1.post1.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.5.1.dist-info → sglang-0.5.1.post1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
|
3
|
+
import torch
|
4
|
+
|
3
5
|
from sglang.srt.utils import get_bool_env_var, get_device_sm
|
4
6
|
|
5
7
|
logger = logging.getLogger(__name__)
|
@@ -7,8 +9,10 @@ logger = logging.getLogger(__name__)
|
|
7
9
|
|
8
10
|
def _compute_enable_deep_gemm():
|
9
11
|
sm_version = get_device_sm()
|
10
|
-
|
11
|
-
|
12
|
+
if sm_version < 90:
|
13
|
+
return False
|
14
|
+
# TODO fix deepgemm cu129 fp8 issue
|
15
|
+
if torch.version.cuda == "12.9":
|
12
16
|
return False
|
13
17
|
|
14
18
|
try:
|
sglang/srt/models/grok.py
CHANGED
@@ -842,10 +842,6 @@ class Grok1ForCausalLM(nn.Module):
|
|
842
842
|
if self.is_weights_presharded:
|
843
843
|
setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights)
|
844
844
|
|
845
|
-
default_replicate_lm_head = False
|
846
|
-
self.replicate_lm_head = getattr(
|
847
|
-
config, "replicate_lm_head", default_replicate_lm_head
|
848
|
-
)
|
849
845
|
self.replicate_embedding = getattr(config, "replicate_embedding", False)
|
850
846
|
|
851
847
|
self.model = Grok1Model(
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.5.1"
|
1
|
+
__version__ = "0.5.1.post1"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.5.1
|
3
|
+
Version: 0.5.1.post1
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -374,7 +374,7 @@ Dynamic: license-file
|
|
374
374
|
| [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
|
375
375
|
|
376
376
|
## News
|
377
|
-
- [2025/08] 🔔 SGLang x AMD SF Meetup on 8/22: Hands-on GPU workshop, tech talks by AMD/xAI/SGLang, and networking. [
|
377
|
+
- [2025/08] 🔔 SGLang x AMD SF Meetup on 8/22: Hands-on GPU workshop, tech talks by AMD/xAI/SGLang, and networking ([Roadmap](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_roadmap.pdf), [Large-scale EP](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_ep.pdf)).
|
378
378
|
- [2025/08] 🔥 SGLang provides day-0 support for OpenAI gpt-oss model ([instructions](https://github.com/sgl-project/sglang/issues/8833))
|
379
379
|
- [2025/06] 🔥 SGLang, the high-performance serving infrastructure powering trillions of tokens daily, has been awarded the third batch of the Open Source AI Grant by a16z ([a16z blog](https://a16z.com/advancing-open-source-ai-through-benchmarks-and-bold-experimentation/)).
|
380
380
|
- [2025/06] 🔥 Deploying DeepSeek on GB200 NVL72 with PD and Large Scale EP (Part I): 2.7x Higher Decoding Throughput ([blog](https://lmsys.org/blog/2025-06-16-gb200-part-1/)).
|
@@ -9,7 +9,7 @@ sglang/global_config.py,sha256=ZMTux_PsGnvkyJ0kTFwhTdbnFwIjjpGDogut_9Lu4Vo,1732
|
|
9
9
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
10
10
|
sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
|
11
11
|
sglang/utils.py,sha256=dC2PNkKYTgDHsNrWdZJ74GvaXGSHCeIk_aZ-TA89OhY,16380
|
12
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/version.py,sha256=cOaLNeTwkPXJFU_tSZi-530rXF3qjxypENpk5W_HqGE,28
|
13
13
|
sglang/eval/llama3_eval.py,sha256=mLNRZJIqV4CfqrY8UGnJEcHw2Xsyr1eyYZgFSUFYr1g,9997
|
14
14
|
sglang/eval/loogle_eval.py,sha256=-CC2s2kh5qUoDrHRkQVkC_jNvBgNojXbf456ny5s78s,4557
|
15
15
|
sglang/lang/api.py,sha256=rcp3GeoyZhmJ0GDLPRkuZNcxd0TBJy_wfUDpcmQoqW8,7210
|
@@ -614,7 +614,7 @@ sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a
|
|
614
614
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
615
615
|
sglang/srt/layers/quantization/deep_gemm_wrapper/__init__.py,sha256=esJMd0Yuj68t6QYOpmIFuiWP2J2dxTMC4bRBNH0Xk6I,26
|
616
616
|
sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py,sha256=FaHKgY9y1oi3zlCmkorSK4r4dhPy7NiviVIhtabvzjM,11536
|
617
|
-
sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py,sha256=
|
617
|
+
sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py,sha256=LPO728y32uHQOGfk8U26jDIbGlAcQdD-ZYaqfboyhzM,853
|
618
618
|
sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py,sha256=On_uvP1ThxgQFAzOWKoteM0vIOHqmtI127A4qbPtmbc,3264
|
619
619
|
sglang/srt/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
620
620
|
sglang/srt/layers/quantization/quark/quark.py,sha256=EqQvWzAc6nd_yl6UIMIwmozKif4I8fRNfBum040UUEQ,14613
|
@@ -728,7 +728,7 @@ sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtY
|
|
728
728
|
sglang/srt/models/gpt_oss.py,sha256=qXa6BAm8gBTuCSX0ax3Iz4SkIIko0s1u8sVu8kJVzkY,44930
|
729
729
|
sglang/srt/models/granite.py,sha256=8q92shxVPAp_cJDohJATffSGd7Z0Oi-vF5jpY6DlK4s,19840
|
730
730
|
sglang/srt/models/granitemoe.py,sha256=j1rgZ62CbBioECjUblDCw_NneDQgY_QJODsI0fqXVO8,13779
|
731
|
-
sglang/srt/models/grok.py,sha256=
|
731
|
+
sglang/srt/models/grok.py,sha256=8KCR13LtdnhswJrIwgb0sdFW0OxCA8GQ0fbN8gb856E,40518
|
732
732
|
sglang/srt/models/hunyuan.py,sha256=RSZErx-Swt65kmfvSXJQJJ6KhjucuNY4UUVLWC8hWaA,31102
|
733
733
|
sglang/srt/models/idefics2.py,sha256=U3khd3hbdawJeRNXsxmaKHdssOCT5TPOZ1D-2_zHoQo,12079
|
734
734
|
sglang/srt/models/internlm2.py,sha256=F_iNY1gUqzAjAuUatcE47gnrcoTh5_08PY2Rw9tKr9M,13150
|
@@ -862,8 +862,8 @@ sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqn
|
|
862
862
|
sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
|
863
863
|
sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
|
864
864
|
sglang/test/attention/test_trtllm_mla_backend.py,sha256=ilrX20-1hVo0nmKN0GTqzKoUmDarTXGS8nWJ6c8RHVY,41792
|
865
|
-
sglang-0.5.1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
866
|
-
sglang-0.5.1.dist-info/METADATA,sha256=
|
867
|
-
sglang-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
868
|
-
sglang-0.5.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
869
|
-
sglang-0.5.1.dist-info/RECORD,,
|
865
|
+
sglang-0.5.1.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
866
|
+
sglang-0.5.1.post1.dist-info/METADATA,sha256=IgpodN0t3cCFIUqlTb5l0iNjc0obANmUVH_Wv3j5i8M,27803
|
867
|
+
sglang-0.5.1.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
868
|
+
sglang-0.5.1.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
869
|
+
sglang-0.5.1.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|