sglang 0.4.9__py3-none-any.whl → 0.4.9.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. sglang/bench_serving.py +2 -2
  2. sglang/srt/configs/model_config.py +12 -1
  3. sglang/srt/conversation.py +35 -1
  4. sglang/srt/disaggregation/mooncake/conn.py +35 -4
  5. sglang/srt/entrypoints/http_server_engine.py +1 -1
  6. sglang/srt/layers/communicator.py +3 -1
  7. sglang/srt/layers/flashinfer_comm_fusion.py +3 -3
  8. sglang/srt/layers/layernorm.py +2 -2
  9. sglang/srt/layers/moe/cutlass_w4a8_moe.py +215 -0
  10. sglang/srt/layers/moe/ep_moe/kernels.py +58 -0
  11. sglang/srt/layers/moe/ep_moe/layer.py +140 -2
  12. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +2 -0
  13. sglang/srt/layers/moe/fused_moe_triton/layer.py +135 -58
  14. sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +176 -0
  15. sglang/srt/layers/quantization/__init__.py +2 -0
  16. sglang/srt/layers/quantization/fp8.py +28 -7
  17. sglang/srt/layers/quantization/modelopt_quant.py +244 -1
  18. sglang/srt/layers/quantization/w4afp8.py +264 -0
  19. sglang/srt/layers/vocab_parallel_embedding.py +9 -3
  20. sglang/srt/lora/triton_ops/gate_up_lora_b.py +30 -19
  21. sglang/srt/lora/triton_ops/qkv_lora_b.py +30 -19
  22. sglang/srt/lora/triton_ops/sgemm_lora_a.py +27 -11
  23. sglang/srt/lora/triton_ops/sgemm_lora_b.py +27 -15
  24. sglang/srt/managers/cache_controller.py +41 -195
  25. sglang/srt/managers/io_struct.py +8 -1
  26. sglang/srt/managers/mm_utils.py +4 -2
  27. sglang/srt/managers/schedule_batch.py +1 -1
  28. sglang/srt/managers/scheduler.py +17 -5
  29. sglang/srt/mem_cache/hiradix_cache.py +2 -0
  30. sglang/srt/mem_cache/memory_pool.py +113 -63
  31. sglang/srt/mem_cache/memory_pool_host.py +6 -109
  32. sglang/srt/mem_cache/radix_cache.py +8 -4
  33. sglang/srt/models/deepseek_v2.py +16 -2
  34. sglang/srt/models/mllama4.py +360 -79
  35. sglang/srt/multimodal/mm_utils.py +2 -2
  36. sglang/srt/multimodal/processors/mllama4.py +62 -60
  37. sglang/srt/server_args.py +15 -0
  38. sglang/srt/two_batch_overlap.py +3 -0
  39. sglang/srt/utils.py +37 -17
  40. sglang/test/test_cutlass_w4a8_moe.py +281 -0
  41. sglang/utils.py +5 -5
  42. sglang/version.py +1 -1
  43. {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/METADATA +4 -3
  44. {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/RECORD +47 -43
  45. {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/WHEEL +0 -0
  46. {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/licenses/LICENSE +0 -0
  47. {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/top_level.txt +0 -0
@@ -3,14 +3,14 @@ sglang/api.py,sha256=rcp3GeoyZhmJ0GDLPRkuZNcxd0TBJy_wfUDpcmQoqW8,7210
3
3
  sglang/bench_offline_throughput.py,sha256=TwgXZYmwPaHVsdPtNU9LO0p1tr5OOKLy9wYgrfGAlFU,14056
4
4
  sglang/bench_one_batch.py,sha256=RJsXQ7pvq8y4yzW7svURtWZF5RiawxEMrsjV0OcKsrk,19388
5
5
  sglang/bench_one_batch_server.py,sha256=LS1BRmFwP67IpYlU5wmkWjqquiA5drvtWe-fwHnFSKI,14170
6
- sglang/bench_serving.py,sha256=sQKEgJZIfHRu3vNh8QWqIrTQzOfUMaoVidG05ewTVUE,71744
6
+ sglang/bench_serving.py,sha256=nOjDnqOKDezDvtCvEhC1_FAoWVkir38bSaEIcSGGVZg,71748
7
7
  sglang/check_env.py,sha256=qDMIG2rCNBH1yKnxQmF-Bp10oiFMUKMgfZLHZYOmdSY,8412
8
8
  sglang/compile_deep_gemm.py,sha256=H118s76CKdpZr-cDeFfBCePe7--c_teEBNVIzchYVSo,6243
9
9
  sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
10
10
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
12
- sglang/utils.py,sha256=VH6zrnkjzcR3DE__WfVph6wswJ4JuzoQD47VmbZ38eI,16435
13
- sglang/version.py,sha256=LdxLMJM_JXsCQBeSvnxCNyGWmINE0yWfna3DQaT41Vs,22
12
+ sglang/utils.py,sha256=leKs-YyX1_Jk216yoKX-KRYx_EbLiuemsHbD31xSjMw,16445
13
+ sglang/version.py,sha256=zFQM77wgAmWBTKJkmd0BFXV3JGA69nyLLIJtFnFJo6Q,28
14
14
  sglang/eval/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
15
15
  sglang/eval/loogle_eval.py,sha256=pRPVA4fxGmT3_oXvXnlNE-UlPrcQGLBJF-OSE9YWJXM,4336
16
16
  sglang/lang/chat_template.py,sha256=HKlx7snSWFED8GKF5ex79sQrPWFw5TSXQM0_LsiD9Bc,20552
@@ -30,7 +30,7 @@ sglang/srt/_custom_ops.py,sha256=0lJRMTKTjoxJPh1qQnnMY02Z3SyBDi7LJI34IBLQsgQ,446
30
30
  sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
31
31
  sglang/srt/code_completion_parser.py,sha256=KFa95OU0TeVjJkOqIgS4xV3kaJ5dFWOmAAgISyc1oEc,3803
32
32
  sglang/srt/constants.py,sha256=0i-tEwG2BSYNDy96MxnGHV5HnBELkYcnsVGsE-R18o0,93
33
- sglang/srt/conversation.py,sha256=fFP6RxkRolz-YLl55o0juzp0Ov-78j89RCt7Cflzr2Q,38335
33
+ sglang/srt/conversation.py,sha256=tu6MggZEhA4e3JJrgvXr44f3W6euDubLLnO5LfFbY6o,39588
34
34
  sglang/srt/custom_op.py,sha256=87r2PIgiGLREsIZQ8qsUD-zgI66_54y9GrE0buXzoCI,3076
35
35
  sglang/srt/debug_utils.py,sha256=slaFOY4BYDBFatkfu8FZlzai-u4LFS-5GUzdr-t50zE,2241
36
36
  sglang/srt/hf_transformers_utils.py,sha256=1Ku6POAyk1Hb4Q1VcHTE_2EjujVhPerPO9V9NAcHbOI,11997
@@ -40,10 +40,10 @@ sglang/srt/operations.py,sha256=ddQ8KO63L73OciaR8MZ9h2h83gKVY4-WuWgeEGowPJA,5346
40
40
  sglang/srt/operations_strategy.py,sha256=Pwd2sKeRtKh9WJXgzlNr2tU9y6YMcI3MDLbatHqlMws,7145
41
41
  sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
42
42
  sglang/srt/reasoning_parser.py,sha256=DkZrFhkJ9qe7w46fTnM9sqhXwTDgZxD4qPdGka-Dly8,7579
43
- sglang/srt/server_args.py,sha256=Fg1CqO92iiKA5jLd9vqKZ59uwnw0l23DO6KL47kBJgY,75458
43
+ sglang/srt/server_args.py,sha256=DQO3e_Dxi3A3ZPApBtDeAulirZ15wapNa4y-HuOUxIM,75997
44
44
  sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
45
- sglang/srt/two_batch_overlap.py,sha256=kunhMhCF_ucK4EkthOUEUegd1C9j4aSbpBLYaIKYgas,28447
46
- sglang/srt/utils.py,sha256=pI-0YtHmQrC2B2Q_uK13qvje1y_ni6TNzqs8W7T3hts,85846
45
+ sglang/srt/two_batch_overlap.py,sha256=Pqqj4BzVbXCiOvWE20MGMo21AoAmEPd0R32DsPfD5c8,28562
46
+ sglang/srt/utils.py,sha256=CMLibLUA95EYzGbHKj53jBnC3tpIO6eKLT1RLd__4bU,86841
47
47
  sglang/srt/warmup.py,sha256=zldxhMlXpclRAJXmfBjJNUJd1eDizVdysibBvQyTVuA,1782
48
48
  sglang/srt/configs/__init__.py,sha256=8EcVRP95epZ49DxBa6LgKWt7eO3Qe7Hrr3V1c6HkMnY,553
49
49
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
@@ -56,7 +56,7 @@ sglang/srt/configs/janus_pro.py,sha256=95qLFQ43n-q5MVEDnh9OBXCUNXOCofwBKYvd7LKPv
56
56
  sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew,1358
57
57
  sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
58
58
  sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
59
- sglang/srt/configs/model_config.py,sha256=EzF91fOMu97BxPFSrVU4r-RTuUS3pqoDObKAstnPxGE,26983
59
+ sglang/srt/configs/model_config.py,sha256=7PWC4qdUxKjOo47It1kNhaEQQwsd3CYWYvcv-oP0NXE,27517
60
60
  sglang/srt/configs/update_config.py,sha256=2EpDVocEpMv35g1V-iPLSaLjBgylC5vN0yYSlW49k5w,4664
61
61
  sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
62
62
  sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
@@ -89,7 +89,7 @@ sglang/srt/disaggregation/common/utils.py,sha256=SxRhAWisNK8seGhb5BXBJ5u53DF7yeK
89
89
  sglang/srt/disaggregation/fake/__init__.py,sha256=jJGWdXwaQiGIoR6atKqkQfkJmVyQ09l55VUN2WjwaeY,77
90
90
  sglang/srt/disaggregation/fake/conn.py,sha256=oD1DArn1yDFZCu-X6p93uSLlAXEkt9lYxERICMznxGw,2286
91
91
  sglang/srt/disaggregation/mooncake/__init__.py,sha256=0TgqkAdQI1YynbHY6c0QISvVoOSk-0SwCIq5rjPSmgE,156
92
- sglang/srt/disaggregation/mooncake/conn.py,sha256=MATJkiS_5Vh8fc8Gx6S6cI3zCYDbgKXVEAa1xkUsCKA,58879
92
+ sglang/srt/disaggregation/mooncake/conn.py,sha256=EDINLcY3AoYihlu4ZMe__AJuGxCepaNMggU35-RENeY,60983
93
93
  sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=JYB9T-EPdJNfv4I_sVpmMOZCOJ14itD97ws6tTvj240,4281
94
94
  sglang/srt/disaggregation/nixl/__init__.py,sha256=qODVPIGWUXKXq4zsRIcMYoAoAeg6nBIN9vdQOlVMANE,136
95
95
  sglang/srt/disaggregation/nixl/conn.py,sha256=cXqWU2Gbi6E2LS0MznEcgFCrwONhYxBtD-zE9DU32z0,20333
@@ -110,7 +110,7 @@ sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132Bv
110
110
  sglang/srt/entrypoints/EngineBase.py,sha256=yKN76witT2jz1zhmLHmPNLGMpK2UiOTaKQ2KPD8l99U,2594
111
111
  sglang/srt/entrypoints/engine.py,sha256=o6sccP-gdeO6rOY9vbSqVmUNlBrK2YsinrDf42URg5A,31328
112
112
  sglang/srt/entrypoints/http_server.py,sha256=7q6TK02vkppIrW_oa_Xxhr-EV3SNaAwAt_pcnLNfC8w,37503
113
- sglang/srt/entrypoints/http_server_engine.py,sha256=ncN45ti9mawSOimPSedI6zugfoMhMQOYh4tmdfC9LcE,4936
113
+ sglang/srt/entrypoints/http_server_engine.py,sha256=_--j4U04OeJLlnnv1f0XmCd_Ry0z1FlhkrbePX8rYV0,4938
114
114
  sglang/srt/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
115
  sglang/srt/entrypoints/openai/protocol.py,sha256=qIwhokAlAVbp7Nin1eE4TCnUyrAhKcPS47kzHkhzZg4,18799
116
116
  sglang/srt/entrypoints/openai/serving_base.py,sha256=5NJ2S_6B2NFSwn4nLp6eaeJ5iC3IcQzMEY9lW_gPcdA,5246
@@ -144,11 +144,11 @@ sglang/srt/function_call/qwen25_detector.py,sha256=9JfZem_5nw91Og2biwq8eIpUQjy_3
144
144
  sglang/srt/function_call/utils.py,sha256=__ImDF2kNyoLWsYO5RYoryvy1mmgEjnjXlCvLv-uLCM,1695
145
145
  sglang/srt/layers/activation.py,sha256=UDrJcnQoIOJjMzZ9XAy5JlEJ-do44tGMtcitY7I8kPQ,7327
146
146
  sglang/srt/layers/amx_utils.py,sha256=1mENgHK2B8mgaD1oMtgbZ15Jmy_Uu1QueBmo09Ff2iA,2865
147
- sglang/srt/layers/communicator.py,sha256=_aq6rDgDlRRVXO1EquSNtmMADuQJGdRZkbt0vCxTxqo,19314
147
+ sglang/srt/layers/communicator.py,sha256=eDa24tQMds3YTd1Xh4BqFpLBHdw9iF23aaoctiygOuc,19546
148
148
  sglang/srt/layers/dp_attention.py,sha256=V7m_PfvHHtRMJMviIqmoyqf5VFFJLYYcynDuI5dXtAU,10275
149
149
  sglang/srt/layers/elementwise.py,sha256=MyQUflyKEfPZ-BggW1Kd4hB53RFD6FXGc2S5LXjx_do,16026
150
- sglang/srt/layers/flashinfer_comm_fusion.py,sha256=IwI3VPHVZaWTgf_rsSH_5QROTcmoWnjlZ9annh7cYRs,5888
151
- sglang/srt/layers/layernorm.py,sha256=4KGaB4QSO5hBcr26TtPfspD3XZwr5X55Pkn5xN9eJrQ,8647
150
+ sglang/srt/layers/flashinfer_comm_fusion.py,sha256=fkTcAB7qYwSWi95qI3Rqq0JUyDpJdcYkd4TYkWO01HI,5891
151
+ sglang/srt/layers/layernorm.py,sha256=ooqA-t-vY5erbKBwqnOotfDsJRTygP5E10CfzEAVF6M,8657
152
152
  sglang/srt/layers/linear.py,sha256=cxth3pvLGdud-ZoHrGFhrPeuhLnBX4IWohyBr2gCCv0,54890
153
153
  sglang/srt/layers/logits_processor.py,sha256=tOPMYopEQuoLBzwbjSmU9OWFYh2mmy3ViJSbA_wpFOU,25619
154
154
  sglang/srt/layers/multimodal.py,sha256=YVR69WW-2aGDcZHT8IVJ6F_LRM7wraZr8VjrPDXqDmA,2104
@@ -159,7 +159,7 @@ sglang/srt/layers/rotary_embedding.py,sha256=EhxI0E8jcTWZ2COpnku7crbW8Hew5fe_ujM
159
159
  sglang/srt/layers/sampler.py,sha256=xNds1migup2s6b9_pS6ljkJUkvNtv7nmTGeIdOzoQ6w,11182
160
160
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
161
161
  sglang/srt/layers/utils.py,sha256=IWGg1Hb7c33Z3LHRPVJyUAzp3BnSid23ZWXAmJ_Jvp8,1204
162
- sglang/srt/layers/vocab_parallel_embedding.py,sha256=1Av2YHpx-Oa18-33ps0lpkOuUgnY-Y--Yt_I7YcpYBU,23438
162
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=i1Uoo8TE877jLNjAjwoyMD36W7J3WwFD402YWq0qTT0,23560
163
163
  sglang/srt/layers/attention/aiter_backend.py,sha256=7sEUgViw-xl3yok91yyOD9gTi8lQmME0g0ZiKVTCcyI,32851
164
164
  sglang/srt/layers/attention/ascend_backend.py,sha256=jPCsU9_gH1iZNoZHD9nCeDdVdXqBt31LI65N55BTJPg,8250
165
165
  sglang/srt/layers/attention/base_attn_backend.py,sha256=KXVcCguwXh-PSrY9Y2aUrlXXUhWdbVxqVEF2_xIMvm4,3466
@@ -184,16 +184,18 @@ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=waZsmpKIp8rTg
184
184
  sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
185
185
  sglang/srt/layers/moe/cutlass_moe.py,sha256=--bNTA2BGbHeULb_XXDoRUyWWbE-doHo5K5k1T0N0WA,14323
186
186
  sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNpw8792gKXwZsbEU,6522
187
+ sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=bc8s5Oc36pCF0VozxZuqFOOU2fov-4wGPlyduye3Imk,7296
187
188
  sglang/srt/layers/moe/fused_moe_native.py,sha256=bW3KWxxz9rxKMUQqfmAtF-7ptTODA1pwLydE05ABDJE,5030
188
189
  sglang/srt/layers/moe/router.py,sha256=UrPieRvemN7Ew48gtG7DA2xhNDBRSnZxzugTEBI-0_E,12006
189
190
  sglang/srt/layers/moe/topk.py,sha256=DhG8EIqyhn41erCKRwX2wGrYOOF-1VdBrCZwyyc1Ks0,18048
190
191
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
191
- sglang/srt/layers/moe/ep_moe/kernels.py,sha256=bHn50WQ9K57wLxLWZ8ytNJrCi7bAQ6MegN2o7qxB_Mc,40690
192
- sglang/srt/layers/moe/ep_moe/layer.py,sha256=DjUSrRAiRzvuDRvF3JyTqupL3z7smzwXO9hQpQOK0Io,56658
192
+ sglang/srt/layers/moe/ep_moe/kernels.py,sha256=FhQAv9JQncMy8luzULWbJtH3MxCAf3tHUQU-mAJlPlM,42543
193
+ sglang/srt/layers/moe/ep_moe/layer.py,sha256=hqeNZjqx6aq2XGTZLkR9j9ITwdetr1mI85vWPo_1xSo,62480
193
194
  sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=aiBE3mjvWV3eBrFGH9J44tuJncQwOjRS_XeyBNCEtqM,24379
194
195
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
195
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=GQk0WVdPYI1lOsa0ItkiYcGHS_k8r5y7Mfy485MBZl8,63177
196
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=wsNUtzqE2Emi0RAwVzpFHUOVOqUMXr3pCMSJHqkc7EE,34126
196
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=Ffcu3Jvp7-nllybh5EWtG0MamWI9PqO8a6ESiMKWEB0,63292
197
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=o6WDXu6maDZhVGu75Qh0nVkPgrMLgJvLp7npyCHPjUg,37357
198
+ sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py,sha256=hoWVdf8ry8IDGH2bfk2WW-y2S5h5haLTGanBSwkkeE0,5848
197
199
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
198
200
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
199
201
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -356,22 +358,23 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=wsNUtzqE2Emi0RAwVzpFHUOVO
356
358
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LNx1W_BsWZcpHomiScCRap46dV0-F7S_w3Htskoqlm8,3263
357
359
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=fo1akCuGoqcGwQgfh56hApgg-wLXfo9kHHksE_6m1F4,3262
358
360
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=a9XJZ0XudUTebAdN5guVq-3XSvZetHIRjatO1-_0xCk,3257
359
- sglang/srt/layers/quantization/__init__.py,sha256=isMl8LINJFMzEse4r2Ta_7JXT15eKCX4mDIikuwticM,12559
361
+ sglang/srt/layers/quantization/__init__.py,sha256=JHV4X982TW7_U7ZobQwNIdK-Z5saEH-R1xKWrF6jgRg,12650
360
362
  sglang/srt/layers/quantization/awq.py,sha256=KemDG55U3B6YZVjMV71awVAIj0islFvtxcUHmOBeGy0,6739
361
363
  sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
362
364
  sglang/srt/layers/quantization/blockwise_int8.py,sha256=vWyPZsRLhdKtSmjvlT5fsowBK_nEebYbDAUh2yqseGw,15285
363
- sglang/srt/layers/quantization/fp8.py,sha256=fPzEIAYqiSgVjVTu6_CceyW-kemzSRGS_Q-FZ0eGAD0,46375
365
+ sglang/srt/layers/quantization/fp8.py,sha256=X97n8M7Kqhv98JvVQsSW6oYJtI2Wwrnoix-LMtsaxu4,47468
364
366
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=vziHnk-gVbeY3z94216UDCU5kyev36ZrKldMgglSmpw,34718
365
367
  sglang/srt/layers/quantization/fp8_utils.py,sha256=fsX1x17RwcK88N1WpF-YELQOpkpHvJnoLriZj27H2R8,25611
366
368
  sglang/srt/layers/quantization/gptq.py,sha256=B0J14a-OANM2uglnatukFMlvpDnUprV3-p_l4308WUQ,26838
367
369
  sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRC9FOn9exNvK4QHbUeBj3Hhv32VcyGphapFPt5b84,12625
368
370
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
369
371
  sglang/srt/layers/quantization/kv_cache.py,sha256=_9pF5rwvB7ta6Gdc5YKVVGbNzYwqmhIx4TrX1-xnodQ,3261
370
- sglang/srt/layers/quantization/modelopt_quant.py,sha256=wW90ZS1ZrGOXf7l5svudmXqAi3JE2SkCV-fwqWeYfdg,30905
372
+ sglang/srt/layers/quantization/modelopt_quant.py,sha256=0abOZfOyGYOQFiyMssZPey1YekQrAQBcMp-GQlrFXZY,40746
371
373
  sglang/srt/layers/quantization/moe_wna16.py,sha256=9w3TYF1aQ0N0zNBgcuyOX-z-zfq4it2ul_MjKk_fPCA,19680
372
374
  sglang/srt/layers/quantization/qoq.py,sha256=adhsCixRUTh1m8sfpRWNhh5oX3I1WJli3Elg8uexP6A,8222
373
375
  sglang/srt/layers/quantization/quant_utils.py,sha256=kBibCBK9DCambuPy2atB82Bmalb-Vs_wbQoVHT7h2Lw,5089
374
376
  sglang/srt/layers/quantization/utils.py,sha256=qHdWbLQD8teKhv2tOcuyGegpFonXygoJHwCAD7Ur5MA,5439
377
+ sglang/srt/layers/quantization/w4afp8.py,sha256=Rf8wMqtGUgLF8soOM16owfFI0AR1q1Ylzr5YpcdqyxU,9538
375
378
  sglang/srt/layers/quantization/w8a8_fp8.py,sha256=ZB6ydquyPOORDe9OCoEBRH6dsQhCeRWv1YIgm8UDQwQ,11622
376
379
  sglang/srt/layers/quantization/w8a8_int8.py,sha256=aPMm-sk5nBpHwHzkedvuAWXiqWpp_NPyVaPHxHnuhVg,10847
377
380
  sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -548,20 +551,20 @@ sglang/srt/lora/backend/base_backend.py,sha256=EIz8I-GIrdmK4fISw3ENhbJVVITaxKfyL
548
551
  sglang/srt/lora/backend/flashinfer_backend.py,sha256=el6IAB4kTgDTbwCggmqFuukliyoapN5X6FLksG-4wJ8,4151
549
552
  sglang/srt/lora/backend/triton_backend.py,sha256=uqwBGlguXX8EkCKjSPqac6SPTgcKA31u3u6HkKPQcos,2530
550
553
  sglang/srt/lora/triton_ops/__init__.py,sha256=JGOYPIn1XbGcyJTbt8A0qoc02PYONSGNNjGkC8yJpAM,283
551
- sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=vOUDzC_Sq7UGnDbdF8F7ChKG12hDIOFwwMb7e62Lz9w,5280
552
- sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=qCpJAksCSXS3Znm-ABx1QfrdNqj9hrP61oaeW-n2RhU,5945
553
- sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=yNBgl8jS76DPlGJg8puRkSnKkewf2_2oKfVZZRq4lYY,4351
554
- sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=EoF9HymgQ1V9DwyOJOvbw1hCbEjprVOne_fC4VHgj-s,4518
555
- sglang/srt/managers/cache_controller.py,sha256=K00t5dSZU6gbAMHZWhNyj0ndgHISvelTl7UMI5KqBuE,19352
554
+ sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=QURCYxHNR8Ls4SQtt3dvdgjvdDVhywI9tOzsK8SV9m8,5779
555
+ sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=iz5scFNo2YFTeiM8beWg9Z1oZI-6AM_T1wBMCQ6qp2Q,6485
556
+ sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=Ksova04wgeGsFqGOXWqJtMYaHgyUYcx8VU42BZQOkVA,5129
557
+ sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=VqCAFvUtq_l-0RGIkx3W_fzD55QcW2FAcqpvSHOBFp4,5115
558
+ sglang/srt/managers/cache_controller.py,sha256=HPQGLwjey6GeW60Vu6MVeLYtRsJrh99_-dqHYSzlBkE,13614
556
559
  sglang/srt/managers/configure_logging.py,sha256=8sNXZ2z9pBWOwn-X3wyz013Ob8Nbm1zDxRkxoZjH-l4,1633
557
560
  sglang/srt/managers/data_parallel_controller.py,sha256=PZ-wOcAFn3PQqMB6I2vjIsFmplf0nlNl5hqTDKTHTG8,12112
558
561
  sglang/srt/managers/detokenizer_manager.py,sha256=SpLxTsSPKBZfD-ZMhJ5zpPPGuUb8PmcYgFSL9CsurU4,10696
559
- sglang/srt/managers/io_struct.py,sha256=DQ2zg8InwXHgOa06n47rbwTzR5mYkj4OGQe5rZ0T77k,35137
560
- sglang/srt/managers/mm_utils.py,sha256=px_pMotzBLhuqw-mLXXzf8N1DthhDn9ILsIVIzW9y74,26582
562
+ sglang/srt/managers/io_struct.py,sha256=gGyTIY3-OhUg2UgkvSxYWiDcc2S9FMJsDJuj182fTNY,35534
563
+ sglang/srt/managers/mm_utils.py,sha256=Dqrk5wG3TDVBz4eZV77L9DmUZFjbz0TqF5ASyMhF2lo,26783
561
564
  sglang/srt/managers/multimodal_processor.py,sha256=mzCrN-8H0bE0iMO8UzxmYmhE2M1qsbVJXGdhAYcjjYA,2016
562
- sglang/srt/managers/schedule_batch.py,sha256=3NF590tt4TJ99svwdrlsrFiICH_8X2DpSOj1ebEUSC8,73119
565
+ sglang/srt/managers/schedule_batch.py,sha256=FdNt7oe9Y-Akfwk4yJ3jNDhd8i1-aO5bHkC0-3IvESU,73114
563
566
  sglang/srt/managers/schedule_policy.py,sha256=0T8URzQmLvEmG-42-SFBBl9WnsOSwYO8-_CcBpuD38M,20474
564
- sglang/srt/managers/scheduler.py,sha256=ZC49CVTsVlDh77QCq5UPhy04NM9FV5YHXuMg4M0cwEE,112191
567
+ sglang/srt/managers/scheduler.py,sha256=w0U6_NTGiq81wQ1cd_SwcrkeloxARKnqwufdWe61k-4,112727
565
568
  sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=XAK2aeLleZBaLtzebEKdpaz9E7uKnHV7ywHSk_WPDYo,30148
566
569
  sglang/srt/managers/session_controller.py,sha256=dzlMNZlo20FTSl64QqK7y7pElsdCy8ICOWWBPTBVwgs,6040
567
570
  sglang/srt/managers/template_manager.py,sha256=RrwRA2oqId_PMQ98qJQGwIxMroOxiorl2sGC9ARou_0,8543
@@ -574,11 +577,11 @@ sglang/srt/mem_cache/allocator.py,sha256=MJUPQt4ECj4ReGeUVGVUNfwrwjHBbf3youdkyp3
574
577
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=XHSzXKgBin-m1HsL47K-GobhLnajaGxqLqGtrLsWaZ0,2540
575
578
  sglang/srt/mem_cache/chunk_cache.py,sha256=MWo4DwRkHKOLKyow53YrGQdvn2dI7hVJuf2Gf-SPAr0,3110
576
579
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
577
- sglang/srt/mem_cache/hiradix_cache.py,sha256=Z_XgbLBxI8SN7kgsi1R6iX6AfgDUF5-ynVnBVRF7KUw,17061
578
- sglang/srt/mem_cache/memory_pool.py,sha256=soOE9l5qZlWHFbqIHwSEW4l7NQXzmEjpJKk1rfehQZc,37055
579
- sglang/srt/mem_cache/memory_pool_host.py,sha256=9N34Y7P0aM2xkmhsEw2_W8xN897h-HEwzRPIu1REwac,13186
580
+ sglang/srt/mem_cache/hiradix_cache.py,sha256=0jZEWFP4k5LkKMiCc-G0G9GD7oPoP9zNWaAFPMeVFSw,17137
581
+ sglang/srt/mem_cache/memory_pool.py,sha256=HkdgOYcGfp72el-847b_VOAsjymD9Cyr7BudHhIBoP8,39002
582
+ sglang/srt/mem_cache/memory_pool_host.py,sha256=G_vmKQFw6jvPnaxjltRLXueDUjlqYgIdxRqoM1kTj5Q,8863
580
583
  sglang/srt/mem_cache/multimodal_cache.py,sha256=wZl2KeEl3xeoEsYdH33UoM-FO8kqfLo_XUgereJVvoM,1348
581
- sglang/srt/mem_cache/radix_cache.py,sha256=ojr9_bUwnPocmpbGZXz8JKac4dS-PrfNYk8UqF4Gvi8,17936
584
+ sglang/srt/mem_cache/radix_cache.py,sha256=qZSsdlTVCZ3UCqkwe3IDYE07-QE72hYExIb2kbhyMmQ,18096
582
585
  sglang/srt/metrics/collector.py,sha256=C9QEJDOEdOPBwy2IJwFS3R6VbGzVzGs2xakKCCPvQDk,19903
583
586
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
584
587
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=3pc1_TpGmUAI51SxVubGGZIRuBmZrj8kOuch970RN2A,30505
@@ -597,7 +600,7 @@ sglang/srt/models/dbrx.py,sha256=4pn_fdoATg01VEqNnIAxNEsKV5XU7gwHyd289eydq1s,155
597
600
  sglang/srt/models/deepseek.py,sha256=ZnN02HdgXCB23Vno5V9UMUoOxH5HC82vNTwsVulUJ-o,17206
598
601
  sglang/srt/models/deepseek_janus_pro.py,sha256=OeeI7vZbE4HGpxa8CwT6-Lbfs7J7WMQ3oBNpVJQpv3w,70450
599
602
  sglang/srt/models/deepseek_nextn.py,sha256=47fehxRdiOizr0rdLg5f1fzQEx6gGAOcDcWKtblloyk,5928
600
- sglang/srt/models/deepseek_v2.py,sha256=L0ewZPnxM6xCCB8tJxswwNKAjE-q_SCloJfZx8Y8LoU,100660
603
+ sglang/srt/models/deepseek_v2.py,sha256=q0zFTMqeqp36ScxAOr_ukPOhQd43u_XeKTA0VLzumCg,101271
601
604
  sglang/srt/models/deepseek_vl2.py,sha256=F-nf2qRiiZNg9js__rYFEc9fNs_HVEfAi_Mh1jvqwkQ,13095
602
605
  sglang/srt/models/exaone.py,sha256=TpO-rtCpEZ8Ua7hGFnS8l2oAYhY0Pij50grc9WQ2mvc,13576
603
606
  sglang/srt/models/gemma.py,sha256=4cdrPISg1VKnsuI-QPTpYvet4BrX8BMKvCIN82iLskw,12641
@@ -639,7 +642,7 @@ sglang/srt/models/mistral.py,sha256=Ox07RJpTYl-4Pb25UIghL_66o9TTA6lPmsUyent_Flc,
639
642
  sglang/srt/models/mixtral.py,sha256=n-Pz7fyWE4m6Uh7sjzAYKNr5Wy4QUj5Yekl0qiCwQI8,17055
640
643
  sglang/srt/models/mixtral_quant.py,sha256=-kQw9r8KcLdO8SNN9RKXzrGq9Q2Al9l9cWHi1VrZSRM,15260
641
644
  sglang/srt/models/mllama.py,sha256=pkS0UWiqKJHRsFtSZzpSI9O6B5_j1OtI5pf_nyDPp3U,39707
642
- sglang/srt/models/mllama4.py,sha256=4E7qGicVuTKAs3vHF2SnoNlQP1mm2eBrsLU7QK3x9ow,10519
645
+ sglang/srt/models/mllama4.py,sha256=EJgmzUIVholvTSPIc1n4fbhZ3_CaXiNQpGzkhZs_BGw,20130
643
646
  sglang/srt/models/olmo.py,sha256=7-q_fA6XXdG7kPUjpUzYkzMUWJobuSjhqjYw9xSUs_c,12671
644
647
  sglang/srt/models/olmo2.py,sha256=azmljhJF4ivcQfUtfsAUxq3ducE4tRKTL6iwe0IKYMg,14327
645
648
  sglang/srt/models/olmoe.py,sha256=TMzt-yB891bvA4X50xL0NjNnFYSx9imlA7N1EG8KNK0,15949
@@ -667,7 +670,7 @@ sglang/srt/models/vila.py,sha256=Q8Z4q8DH3SAiaYocGys6AkEpw8zg3-l3pb5wLcyVves,978
667
670
  sglang/srt/models/xverse.py,sha256=DsNVI9JpzN4jj0Ry6aTrj7r-xq5YLOoDX2kH4YLJA-I,14035
668
671
  sglang/srt/models/xverse_moe.py,sha256=7KCM2-j12towDMNvXkuuYiBOmNauH6NG4Ip40x0khqA,16782
669
672
  sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
670
- sglang/srt/multimodal/mm_utils.py,sha256=7o4w1W0VAWgEquVIz7qw7-IxYbZY2g3MrXw9php90oY,12621
673
+ sglang/srt/multimodal/mm_utils.py,sha256=6bV1_a7Iet9W0y3qdd0E-hFM78zalIzNZCYbLzJX2y8,12640
671
674
  sglang/srt/multimodal/processors/base_processor.py,sha256=05moS0iDYUfI9CKzio5ip8cjsfUjqYi_S5fzArWJxw8,21539
672
675
  sglang/srt/multimodal/processors/clip.py,sha256=chKReTXD4ETUQ0s7BOk_AmWHEZXR5no6fprrnHwSNIM,1271
673
676
  sglang/srt/multimodal/processors/deepseek_vl_v2.py,sha256=VHrUbOTF_18fPYaEx_awS1UMhOy0nhTXoA17wakwyQ4,3281
@@ -679,7 +682,7 @@ sglang/srt/multimodal/processors/kimi_vl.py,sha256=2KMB6iEXizHXzE6yjtzS7V1RFhsA2
679
682
  sglang/srt/multimodal/processors/llava.py,sha256=tIjeDPYxvMWUMXXLcfzzuaLPvdBtPrRzM25QzP0cnaE,7962
680
683
  sglang/srt/multimodal/processors/minicpm.py,sha256=9Y8KCo5eCX274CJIx1D8BVWgferjc0Uf6Zd_HR_IMnY,5316
681
684
  sglang/srt/multimodal/processors/mlama.py,sha256=bW0hVPtRMqYz1RR2I19QtZLUTTPzInZv_6ZhVPJBieo,1406
682
- sglang/srt/multimodal/processors/mllama4.py,sha256=V2Y3G-bv2wdqkgE_XGPLMJp_nSEEr-MD8_S4h8QTDDk,5752
685
+ sglang/srt/multimodal/processors/mllama4.py,sha256=sekRM4DS7mBXpseRoaTlmnH-EjmQ9wFZCkp9q0_FfLE,5603
683
686
  sglang/srt/multimodal/processors/phi4mm.py,sha256=ZYH1fWTpbz6wLGyV6c51kd54vwPRm0qdZiQcc0GlXGI,2337
684
687
  sglang/srt/multimodal/processors/pixtral.py,sha256=VsCIFJpIUM3pSGNe_uRd8NlH7zSokW4xHOrZSi70R1o,4055
685
688
  sglang/srt/multimodal/processors/qwen_vl.py,sha256=lKVWvvtB4OWfyJ9YZLuJapiW93cGb4T3SoqSAsPc1S4,6874
@@ -716,6 +719,7 @@ sglang/test/test_block_fp8_deep_gemm_blackwell.py,sha256=Hnhq4kkyINHb4ONedkp5Kf7
716
719
  sglang/test/test_block_fp8_ep.py,sha256=Ufq6p32sb1RSbFKaSnsVF8RrqqwwAACBUgOJTEH2Lbc,10856
717
720
  sglang/test/test_custom_ops.py,sha256=2bSo9P5_rJZYFq8Y8IKRimDfFyZZGJluhL7Ngny0Pf4,5571
718
721
  sglang/test/test_cutlass_moe.py,sha256=beXdvpwpHh0O8fpLW-RV3Bx8Wu0lRFG_SNkL3Op5ITA,9905
722
+ sglang/test/test_cutlass_w4a8_moe.py,sha256=LmeSS7BrKO6-1mnksOqmo0nvJ_y3oeeiT5ytRkHk2A0,8780
719
723
  sglang/test/test_deepep_utils.py,sha256=749ysTBGNzh6rYUCJhhZBtZpeD15eWTeNHYCytcvZtc,7448
720
724
  sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
721
725
  sglang/test/test_fp4_moe.py,sha256=U4LHzpGl6pCiCZiENRPxVP6hxqkqRZ_0WXbB29vYeAo,8187
@@ -726,8 +730,8 @@ sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
726
730
  sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
727
731
  sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
728
732
  sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
729
- sglang-0.4.9.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
730
- sglang-0.4.9.dist-info/METADATA,sha256=tH3lSIOqvezltkU9P2f6oqUauU_5S8qnLr3jYUIRiV0,27125
731
- sglang-0.4.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
732
- sglang-0.4.9.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
733
- sglang-0.4.9.dist-info/RECORD,,
733
+ sglang-0.4.9.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
734
+ sglang-0.4.9.post1.dist-info/METADATA,sha256=FpRJrvi06EAhGk6RNzufoaKNu-D0u_xFKAzv1pjRmv8,27220
735
+ sglang-0.4.9.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
736
+ sglang-0.4.9.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
737
+ sglang-0.4.9.post1.dist-info/RECORD,,