sglang 0.5.1__py3-none-any.whl → 0.5.1.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  import logging
2
2
 
3
+ import torch
4
+
3
5
  from sglang.srt.utils import get_bool_env_var, get_device_sm
4
6
 
5
7
  logger = logging.getLogger(__name__)
@@ -7,8 +9,10 @@ logger = logging.getLogger(__name__)
7
9
 
8
10
  def _compute_enable_deep_gemm():
9
11
  sm_version = get_device_sm()
10
- # TODO fix blackwell fp8
11
- if sm_version != 90:
12
+ if sm_version < 90:
13
+ return False
14
+ # TODO fix deepgemm cu129 fp8 issue
15
+ if torch.version.cuda == "12.9":
12
16
  return False
13
17
 
14
18
  try:
sglang/srt/models/grok.py CHANGED
@@ -842,10 +842,6 @@ class Grok1ForCausalLM(nn.Module):
842
842
  if self.is_weights_presharded:
843
843
  setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights)
844
844
 
845
- default_replicate_lm_head = False
846
- self.replicate_lm_head = getattr(
847
- config, "replicate_lm_head", default_replicate_lm_head
848
- )
849
845
  self.replicate_embedding = getattr(config, "replicate_embedding", False)
850
846
 
851
847
  self.model = Grok1Model(
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.5.1"
1
+ __version__ = "0.5.1.post1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.5.1
3
+ Version: 0.5.1.post1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -374,7 +374,7 @@ Dynamic: license-file
374
374
  | [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
375
375
 
376
376
  ## News
377
- - [2025/08] 🔔 SGLang x AMD SF Meetup on 8/22: Hands-on GPU workshop, tech talks by AMD/xAI/SGLang, and networking. [Register here](https://lu.ma/gbfhjvuo).
377
+ - [2025/08] 🔔 SGLang x AMD SF Meetup on 8/22: Hands-on GPU workshop, tech talks by AMD/xAI/SGLang, and networking ([Roadmap](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_roadmap.pdf), [Large-scale EP](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_ep.pdf)).
378
378
  - [2025/08] 🔥 SGLang provides day-0 support for OpenAI gpt-oss model ([instructions](https://github.com/sgl-project/sglang/issues/8833))
379
379
  - [2025/06] 🔥 SGLang, the high-performance serving infrastructure powering trillions of tokens daily, has been awarded the third batch of the Open Source AI Grant by a16z ([a16z blog](https://a16z.com/advancing-open-source-ai-through-benchmarks-and-bold-experimentation/)).
380
380
  - [2025/06] 🔥 Deploying DeepSeek on GB200 NVL72 with PD and Large Scale EP (Part I): 2.7x Higher Decoding Throughput ([blog](https://lmsys.org/blog/2025-06-16-gb200-part-1/)).
@@ -9,7 +9,7 @@ sglang/global_config.py,sha256=ZMTux_PsGnvkyJ0kTFwhTdbnFwIjjpGDogut_9Lu4Vo,1732
9
9
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
10
10
  sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
11
11
  sglang/utils.py,sha256=dC2PNkKYTgDHsNrWdZJ74GvaXGSHCeIk_aZ-TA89OhY,16380
12
- sglang/version.py,sha256=eZ1bOun1DDVV0YLOBW4wj2FP1ajReLjbIrGmzN7ASBw,22
12
+ sglang/version.py,sha256=cOaLNeTwkPXJFU_tSZi-530rXF3qjxypENpk5W_HqGE,28
13
13
  sglang/eval/llama3_eval.py,sha256=mLNRZJIqV4CfqrY8UGnJEcHw2Xsyr1eyYZgFSUFYr1g,9997
14
14
  sglang/eval/loogle_eval.py,sha256=-CC2s2kh5qUoDrHRkQVkC_jNvBgNojXbf456ny5s78s,4557
15
15
  sglang/lang/api.py,sha256=rcp3GeoyZhmJ0GDLPRkuZNcxd0TBJy_wfUDpcmQoqW8,7210
@@ -614,7 +614,7 @@ sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a
614
614
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
615
615
  sglang/srt/layers/quantization/deep_gemm_wrapper/__init__.py,sha256=esJMd0Yuj68t6QYOpmIFuiWP2J2dxTMC4bRBNH0Xk6I,26
616
616
  sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py,sha256=FaHKgY9y1oi3zlCmkorSK4r4dhPy7NiviVIhtabvzjM,11536
617
- sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py,sha256=ajxS0OUYp2RbHtP3dB71EzsXKQEHMwP4Wo7-pkJZ9u4,771
617
+ sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py,sha256=LPO728y32uHQOGfk8U26jDIbGlAcQdD-ZYaqfboyhzM,853
618
618
  sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py,sha256=On_uvP1ThxgQFAzOWKoteM0vIOHqmtI127A4qbPtmbc,3264
619
619
  sglang/srt/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
620
620
  sglang/srt/layers/quantization/quark/quark.py,sha256=EqQvWzAc6nd_yl6UIMIwmozKif4I8fRNfBum040UUEQ,14613
@@ -728,7 +728,7 @@ sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtY
728
728
  sglang/srt/models/gpt_oss.py,sha256=qXa6BAm8gBTuCSX0ax3Iz4SkIIko0s1u8sVu8kJVzkY,44930
729
729
  sglang/srt/models/granite.py,sha256=8q92shxVPAp_cJDohJATffSGd7Z0Oi-vF5jpY6DlK4s,19840
730
730
  sglang/srt/models/granitemoe.py,sha256=j1rgZ62CbBioECjUblDCw_NneDQgY_QJODsI0fqXVO8,13779
731
- sglang/srt/models/grok.py,sha256=Va-kPr1XxjK4jWhVkvqzRl3e6OWuGZnbhp1TadadkeQ,40679
731
+ sglang/srt/models/grok.py,sha256=8KCR13LtdnhswJrIwgb0sdFW0OxCA8GQ0fbN8gb856E,40518
732
732
  sglang/srt/models/hunyuan.py,sha256=RSZErx-Swt65kmfvSXJQJJ6KhjucuNY4UUVLWC8hWaA,31102
733
733
  sglang/srt/models/idefics2.py,sha256=U3khd3hbdawJeRNXsxmaKHdssOCT5TPOZ1D-2_zHoQo,12079
734
734
  sglang/srt/models/internlm2.py,sha256=F_iNY1gUqzAjAuUatcE47gnrcoTh5_08PY2Rw9tKr9M,13150
@@ -862,8 +862,8 @@ sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqn
862
862
  sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
863
863
  sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
864
864
  sglang/test/attention/test_trtllm_mla_backend.py,sha256=ilrX20-1hVo0nmKN0GTqzKoUmDarTXGS8nWJ6c8RHVY,41792
865
- sglang-0.5.1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
866
- sglang-0.5.1.dist-info/METADATA,sha256=ay7jXrW9PA-uzilMp3qXaFmXLVietplAqnH1Buh4l8c,27609
867
- sglang-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
868
- sglang-0.5.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
869
- sglang-0.5.1.dist-info/RECORD,,
865
+ sglang-0.5.1.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
866
+ sglang-0.5.1.post1.dist-info/METADATA,sha256=IgpodN0t3cCFIUqlTb5l0iNjc0obANmUVH_Wv3j5i8M,27803
867
+ sglang-0.5.1.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
868
+ sglang-0.5.1.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
869
+ sglang-0.5.1.post1.dist-info/RECORD,,