sglang 0.4.9__py3-none-any.whl → 0.4.9.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_serving.py +2 -2
- sglang/srt/configs/model_config.py +12 -1
- sglang/srt/conversation.py +35 -1
- sglang/srt/disaggregation/mooncake/conn.py +35 -4
- sglang/srt/entrypoints/http_server_engine.py +1 -1
- sglang/srt/layers/communicator.py +3 -1
- sglang/srt/layers/flashinfer_comm_fusion.py +3 -3
- sglang/srt/layers/layernorm.py +2 -2
- sglang/srt/layers/moe/cutlass_w4a8_moe.py +215 -0
- sglang/srt/layers/moe/ep_moe/kernels.py +58 -0
- sglang/srt/layers/moe/ep_moe/layer.py +140 -2
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +2 -0
- sglang/srt/layers/moe/fused_moe_triton/layer.py +135 -58
- sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +176 -0
- sglang/srt/layers/quantization/__init__.py +2 -0
- sglang/srt/layers/quantization/fp8.py +28 -7
- sglang/srt/layers/quantization/modelopt_quant.py +244 -1
- sglang/srt/layers/quantization/w4afp8.py +264 -0
- sglang/srt/layers/vocab_parallel_embedding.py +9 -3
- sglang/srt/lora/triton_ops/gate_up_lora_b.py +30 -19
- sglang/srt/lora/triton_ops/qkv_lora_b.py +30 -19
- sglang/srt/lora/triton_ops/sgemm_lora_a.py +27 -11
- sglang/srt/lora/triton_ops/sgemm_lora_b.py +27 -15
- sglang/srt/managers/cache_controller.py +41 -195
- sglang/srt/managers/io_struct.py +8 -1
- sglang/srt/managers/mm_utils.py +4 -2
- sglang/srt/managers/schedule_batch.py +1 -1
- sglang/srt/managers/scheduler.py +17 -5
- sglang/srt/mem_cache/hiradix_cache.py +2 -0
- sglang/srt/mem_cache/memory_pool.py +113 -63
- sglang/srt/mem_cache/memory_pool_host.py +6 -109
- sglang/srt/mem_cache/radix_cache.py +8 -4
- sglang/srt/models/deepseek_v2.py +16 -2
- sglang/srt/models/mllama4.py +360 -79
- sglang/srt/multimodal/mm_utils.py +2 -2
- sglang/srt/multimodal/processors/mllama4.py +62 -60
- sglang/srt/server_args.py +15 -0
- sglang/srt/two_batch_overlap.py +3 -0
- sglang/srt/utils.py +37 -17
- sglang/test/test_cutlass_w4a8_moe.py +281 -0
- sglang/utils.py +5 -5
- sglang/version.py +1 -1
- {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/METADATA +4 -3
- {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/RECORD +47 -43
- {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/WHEEL +0 -0
- {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.9.dist-info → sglang-0.4.9.post1.dist-info}/top_level.txt +0 -0
@@ -3,14 +3,14 @@ sglang/api.py,sha256=rcp3GeoyZhmJ0GDLPRkuZNcxd0TBJy_wfUDpcmQoqW8,7210
|
|
3
3
|
sglang/bench_offline_throughput.py,sha256=TwgXZYmwPaHVsdPtNU9LO0p1tr5OOKLy9wYgrfGAlFU,14056
|
4
4
|
sglang/bench_one_batch.py,sha256=RJsXQ7pvq8y4yzW7svURtWZF5RiawxEMrsjV0OcKsrk,19388
|
5
5
|
sglang/bench_one_batch_server.py,sha256=LS1BRmFwP67IpYlU5wmkWjqquiA5drvtWe-fwHnFSKI,14170
|
6
|
-
sglang/bench_serving.py,sha256=
|
6
|
+
sglang/bench_serving.py,sha256=nOjDnqOKDezDvtCvEhC1_FAoWVkir38bSaEIcSGGVZg,71748
|
7
7
|
sglang/check_env.py,sha256=qDMIG2rCNBH1yKnxQmF-Bp10oiFMUKMgfZLHZYOmdSY,8412
|
8
8
|
sglang/compile_deep_gemm.py,sha256=H118s76CKdpZr-cDeFfBCePe7--c_teEBNVIzchYVSo,6243
|
9
9
|
sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
|
12
|
-
sglang/utils.py,sha256=
|
13
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/utils.py,sha256=leKs-YyX1_Jk216yoKX-KRYx_EbLiuemsHbD31xSjMw,16445
|
13
|
+
sglang/version.py,sha256=zFQM77wgAmWBTKJkmd0BFXV3JGA69nyLLIJtFnFJo6Q,28
|
14
14
|
sglang/eval/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
15
15
|
sglang/eval/loogle_eval.py,sha256=pRPVA4fxGmT3_oXvXnlNE-UlPrcQGLBJF-OSE9YWJXM,4336
|
16
16
|
sglang/lang/chat_template.py,sha256=HKlx7snSWFED8GKF5ex79sQrPWFw5TSXQM0_LsiD9Bc,20552
|
@@ -30,7 +30,7 @@ sglang/srt/_custom_ops.py,sha256=0lJRMTKTjoxJPh1qQnnMY02Z3SyBDi7LJI34IBLQsgQ,446
|
|
30
30
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
31
31
|
sglang/srt/code_completion_parser.py,sha256=KFa95OU0TeVjJkOqIgS4xV3kaJ5dFWOmAAgISyc1oEc,3803
|
32
32
|
sglang/srt/constants.py,sha256=0i-tEwG2BSYNDy96MxnGHV5HnBELkYcnsVGsE-R18o0,93
|
33
|
-
sglang/srt/conversation.py,sha256=
|
33
|
+
sglang/srt/conversation.py,sha256=tu6MggZEhA4e3JJrgvXr44f3W6euDubLLnO5LfFbY6o,39588
|
34
34
|
sglang/srt/custom_op.py,sha256=87r2PIgiGLREsIZQ8qsUD-zgI66_54y9GrE0buXzoCI,3076
|
35
35
|
sglang/srt/debug_utils.py,sha256=slaFOY4BYDBFatkfu8FZlzai-u4LFS-5GUzdr-t50zE,2241
|
36
36
|
sglang/srt/hf_transformers_utils.py,sha256=1Ku6POAyk1Hb4Q1VcHTE_2EjujVhPerPO9V9NAcHbOI,11997
|
@@ -40,10 +40,10 @@ sglang/srt/operations.py,sha256=ddQ8KO63L73OciaR8MZ9h2h83gKVY4-WuWgeEGowPJA,5346
|
|
40
40
|
sglang/srt/operations_strategy.py,sha256=Pwd2sKeRtKh9WJXgzlNr2tU9y6YMcI3MDLbatHqlMws,7145
|
41
41
|
sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
|
42
42
|
sglang/srt/reasoning_parser.py,sha256=DkZrFhkJ9qe7w46fTnM9sqhXwTDgZxD4qPdGka-Dly8,7579
|
43
|
-
sglang/srt/server_args.py,sha256=
|
43
|
+
sglang/srt/server_args.py,sha256=DQO3e_Dxi3A3ZPApBtDeAulirZ15wapNa4y-HuOUxIM,75997
|
44
44
|
sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
|
45
|
-
sglang/srt/two_batch_overlap.py,sha256=
|
46
|
-
sglang/srt/utils.py,sha256=
|
45
|
+
sglang/srt/two_batch_overlap.py,sha256=Pqqj4BzVbXCiOvWE20MGMo21AoAmEPd0R32DsPfD5c8,28562
|
46
|
+
sglang/srt/utils.py,sha256=CMLibLUA95EYzGbHKj53jBnC3tpIO6eKLT1RLd__4bU,86841
|
47
47
|
sglang/srt/warmup.py,sha256=zldxhMlXpclRAJXmfBjJNUJd1eDizVdysibBvQyTVuA,1782
|
48
48
|
sglang/srt/configs/__init__.py,sha256=8EcVRP95epZ49DxBa6LgKWt7eO3Qe7Hrr3V1c6HkMnY,553
|
49
49
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
@@ -56,7 +56,7 @@ sglang/srt/configs/janus_pro.py,sha256=95qLFQ43n-q5MVEDnh9OBXCUNXOCofwBKYvd7LKPv
|
|
56
56
|
sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew,1358
|
57
57
|
sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
|
58
58
|
sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
|
59
|
-
sglang/srt/configs/model_config.py,sha256=
|
59
|
+
sglang/srt/configs/model_config.py,sha256=7PWC4qdUxKjOo47It1kNhaEQQwsd3CYWYvcv-oP0NXE,27517
|
60
60
|
sglang/srt/configs/update_config.py,sha256=2EpDVocEpMv35g1V-iPLSaLjBgylC5vN0yYSlW49k5w,4664
|
61
61
|
sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
|
62
62
|
sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
|
@@ -89,7 +89,7 @@ sglang/srt/disaggregation/common/utils.py,sha256=SxRhAWisNK8seGhb5BXBJ5u53DF7yeK
|
|
89
89
|
sglang/srt/disaggregation/fake/__init__.py,sha256=jJGWdXwaQiGIoR6atKqkQfkJmVyQ09l55VUN2WjwaeY,77
|
90
90
|
sglang/srt/disaggregation/fake/conn.py,sha256=oD1DArn1yDFZCu-X6p93uSLlAXEkt9lYxERICMznxGw,2286
|
91
91
|
sglang/srt/disaggregation/mooncake/__init__.py,sha256=0TgqkAdQI1YynbHY6c0QISvVoOSk-0SwCIq5rjPSmgE,156
|
92
|
-
sglang/srt/disaggregation/mooncake/conn.py,sha256=
|
92
|
+
sglang/srt/disaggregation/mooncake/conn.py,sha256=EDINLcY3AoYihlu4ZMe__AJuGxCepaNMggU35-RENeY,60983
|
93
93
|
sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=JYB9T-EPdJNfv4I_sVpmMOZCOJ14itD97ws6tTvj240,4281
|
94
94
|
sglang/srt/disaggregation/nixl/__init__.py,sha256=qODVPIGWUXKXq4zsRIcMYoAoAeg6nBIN9vdQOlVMANE,136
|
95
95
|
sglang/srt/disaggregation/nixl/conn.py,sha256=cXqWU2Gbi6E2LS0MznEcgFCrwONhYxBtD-zE9DU32z0,20333
|
@@ -110,7 +110,7 @@ sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132Bv
|
|
110
110
|
sglang/srt/entrypoints/EngineBase.py,sha256=yKN76witT2jz1zhmLHmPNLGMpK2UiOTaKQ2KPD8l99U,2594
|
111
111
|
sglang/srt/entrypoints/engine.py,sha256=o6sccP-gdeO6rOY9vbSqVmUNlBrK2YsinrDf42URg5A,31328
|
112
112
|
sglang/srt/entrypoints/http_server.py,sha256=7q6TK02vkppIrW_oa_Xxhr-EV3SNaAwAt_pcnLNfC8w,37503
|
113
|
-
sglang/srt/entrypoints/http_server_engine.py,sha256=
|
113
|
+
sglang/srt/entrypoints/http_server_engine.py,sha256=_--j4U04OeJLlnnv1f0XmCd_Ry0z1FlhkrbePX8rYV0,4938
|
114
114
|
sglang/srt/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
115
115
|
sglang/srt/entrypoints/openai/protocol.py,sha256=qIwhokAlAVbp7Nin1eE4TCnUyrAhKcPS47kzHkhzZg4,18799
|
116
116
|
sglang/srt/entrypoints/openai/serving_base.py,sha256=5NJ2S_6B2NFSwn4nLp6eaeJ5iC3IcQzMEY9lW_gPcdA,5246
|
@@ -144,11 +144,11 @@ sglang/srt/function_call/qwen25_detector.py,sha256=9JfZem_5nw91Og2biwq8eIpUQjy_3
|
|
144
144
|
sglang/srt/function_call/utils.py,sha256=__ImDF2kNyoLWsYO5RYoryvy1mmgEjnjXlCvLv-uLCM,1695
|
145
145
|
sglang/srt/layers/activation.py,sha256=UDrJcnQoIOJjMzZ9XAy5JlEJ-do44tGMtcitY7I8kPQ,7327
|
146
146
|
sglang/srt/layers/amx_utils.py,sha256=1mENgHK2B8mgaD1oMtgbZ15Jmy_Uu1QueBmo09Ff2iA,2865
|
147
|
-
sglang/srt/layers/communicator.py,sha256=
|
147
|
+
sglang/srt/layers/communicator.py,sha256=eDa24tQMds3YTd1Xh4BqFpLBHdw9iF23aaoctiygOuc,19546
|
148
148
|
sglang/srt/layers/dp_attention.py,sha256=V7m_PfvHHtRMJMviIqmoyqf5VFFJLYYcynDuI5dXtAU,10275
|
149
149
|
sglang/srt/layers/elementwise.py,sha256=MyQUflyKEfPZ-BggW1Kd4hB53RFD6FXGc2S5LXjx_do,16026
|
150
|
-
sglang/srt/layers/flashinfer_comm_fusion.py,sha256=
|
151
|
-
sglang/srt/layers/layernorm.py,sha256=
|
150
|
+
sglang/srt/layers/flashinfer_comm_fusion.py,sha256=fkTcAB7qYwSWi95qI3Rqq0JUyDpJdcYkd4TYkWO01HI,5891
|
151
|
+
sglang/srt/layers/layernorm.py,sha256=ooqA-t-vY5erbKBwqnOotfDsJRTygP5E10CfzEAVF6M,8657
|
152
152
|
sglang/srt/layers/linear.py,sha256=cxth3pvLGdud-ZoHrGFhrPeuhLnBX4IWohyBr2gCCv0,54890
|
153
153
|
sglang/srt/layers/logits_processor.py,sha256=tOPMYopEQuoLBzwbjSmU9OWFYh2mmy3ViJSbA_wpFOU,25619
|
154
154
|
sglang/srt/layers/multimodal.py,sha256=YVR69WW-2aGDcZHT8IVJ6F_LRM7wraZr8VjrPDXqDmA,2104
|
@@ -159,7 +159,7 @@ sglang/srt/layers/rotary_embedding.py,sha256=EhxI0E8jcTWZ2COpnku7crbW8Hew5fe_ujM
|
|
159
159
|
sglang/srt/layers/sampler.py,sha256=xNds1migup2s6b9_pS6ljkJUkvNtv7nmTGeIdOzoQ6w,11182
|
160
160
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
161
161
|
sglang/srt/layers/utils.py,sha256=IWGg1Hb7c33Z3LHRPVJyUAzp3BnSid23ZWXAmJ_Jvp8,1204
|
162
|
-
sglang/srt/layers/vocab_parallel_embedding.py,sha256=
|
162
|
+
sglang/srt/layers/vocab_parallel_embedding.py,sha256=i1Uoo8TE877jLNjAjwoyMD36W7J3WwFD402YWq0qTT0,23560
|
163
163
|
sglang/srt/layers/attention/aiter_backend.py,sha256=7sEUgViw-xl3yok91yyOD9gTi8lQmME0g0ZiKVTCcyI,32851
|
164
164
|
sglang/srt/layers/attention/ascend_backend.py,sha256=jPCsU9_gH1iZNoZHD9nCeDdVdXqBt31LI65N55BTJPg,8250
|
165
165
|
sglang/srt/layers/attention/base_attn_backend.py,sha256=KXVcCguwXh-PSrY9Y2aUrlXXUhWdbVxqVEF2_xIMvm4,3466
|
@@ -184,16 +184,18 @@ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=waZsmpKIp8rTg
|
|
184
184
|
sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
|
185
185
|
sglang/srt/layers/moe/cutlass_moe.py,sha256=--bNTA2BGbHeULb_XXDoRUyWWbE-doHo5K5k1T0N0WA,14323
|
186
186
|
sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNpw8792gKXwZsbEU,6522
|
187
|
+
sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=bc8s5Oc36pCF0VozxZuqFOOU2fov-4wGPlyduye3Imk,7296
|
187
188
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=bW3KWxxz9rxKMUQqfmAtF-7ptTODA1pwLydE05ABDJE,5030
|
188
189
|
sglang/srt/layers/moe/router.py,sha256=UrPieRvemN7Ew48gtG7DA2xhNDBRSnZxzugTEBI-0_E,12006
|
189
190
|
sglang/srt/layers/moe/topk.py,sha256=DhG8EIqyhn41erCKRwX2wGrYOOF-1VdBrCZwyyc1Ks0,18048
|
190
191
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
191
|
-
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=
|
192
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
192
|
+
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=FhQAv9JQncMy8luzULWbJtH3MxCAf3tHUQU-mAJlPlM,42543
|
193
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=hqeNZjqx6aq2XGTZLkR9j9ITwdetr1mI85vWPo_1xSo,62480
|
193
194
|
sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=aiBE3mjvWV3eBrFGH9J44tuJncQwOjRS_XeyBNCEtqM,24379
|
194
195
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
195
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
196
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=
|
196
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=Ffcu3Jvp7-nllybh5EWtG0MamWI9PqO8a6ESiMKWEB0,63292
|
197
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=o6WDXu6maDZhVGu75Qh0nVkPgrMLgJvLp7npyCHPjUg,37357
|
198
|
+
sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py,sha256=hoWVdf8ry8IDGH2bfk2WW-y2S5h5haLTGanBSwkkeE0,5848
|
197
199
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
198
200
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
199
201
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -356,22 +358,23 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=wsNUtzqE2Emi0RAwVzpFHUOVO
|
|
356
358
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LNx1W_BsWZcpHomiScCRap46dV0-F7S_w3Htskoqlm8,3263
|
357
359
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=fo1akCuGoqcGwQgfh56hApgg-wLXfo9kHHksE_6m1F4,3262
|
358
360
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=a9XJZ0XudUTebAdN5guVq-3XSvZetHIRjatO1-_0xCk,3257
|
359
|
-
sglang/srt/layers/quantization/__init__.py,sha256=
|
361
|
+
sglang/srt/layers/quantization/__init__.py,sha256=JHV4X982TW7_U7ZobQwNIdK-Z5saEH-R1xKWrF6jgRg,12650
|
360
362
|
sglang/srt/layers/quantization/awq.py,sha256=KemDG55U3B6YZVjMV71awVAIj0islFvtxcUHmOBeGy0,6739
|
361
363
|
sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
|
362
364
|
sglang/srt/layers/quantization/blockwise_int8.py,sha256=vWyPZsRLhdKtSmjvlT5fsowBK_nEebYbDAUh2yqseGw,15285
|
363
|
-
sglang/srt/layers/quantization/fp8.py,sha256=
|
365
|
+
sglang/srt/layers/quantization/fp8.py,sha256=X97n8M7Kqhv98JvVQsSW6oYJtI2Wwrnoix-LMtsaxu4,47468
|
364
366
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=vziHnk-gVbeY3z94216UDCU5kyev36ZrKldMgglSmpw,34718
|
365
367
|
sglang/srt/layers/quantization/fp8_utils.py,sha256=fsX1x17RwcK88N1WpF-YELQOpkpHvJnoLriZj27H2R8,25611
|
366
368
|
sglang/srt/layers/quantization/gptq.py,sha256=B0J14a-OANM2uglnatukFMlvpDnUprV3-p_l4308WUQ,26838
|
367
369
|
sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRC9FOn9exNvK4QHbUeBj3Hhv32VcyGphapFPt5b84,12625
|
368
370
|
sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
|
369
371
|
sglang/srt/layers/quantization/kv_cache.py,sha256=_9pF5rwvB7ta6Gdc5YKVVGbNzYwqmhIx4TrX1-xnodQ,3261
|
370
|
-
sglang/srt/layers/quantization/modelopt_quant.py,sha256=
|
372
|
+
sglang/srt/layers/quantization/modelopt_quant.py,sha256=0abOZfOyGYOQFiyMssZPey1YekQrAQBcMp-GQlrFXZY,40746
|
371
373
|
sglang/srt/layers/quantization/moe_wna16.py,sha256=9w3TYF1aQ0N0zNBgcuyOX-z-zfq4it2ul_MjKk_fPCA,19680
|
372
374
|
sglang/srt/layers/quantization/qoq.py,sha256=adhsCixRUTh1m8sfpRWNhh5oX3I1WJli3Elg8uexP6A,8222
|
373
375
|
sglang/srt/layers/quantization/quant_utils.py,sha256=kBibCBK9DCambuPy2atB82Bmalb-Vs_wbQoVHT7h2Lw,5089
|
374
376
|
sglang/srt/layers/quantization/utils.py,sha256=qHdWbLQD8teKhv2tOcuyGegpFonXygoJHwCAD7Ur5MA,5439
|
377
|
+
sglang/srt/layers/quantization/w4afp8.py,sha256=Rf8wMqtGUgLF8soOM16owfFI0AR1q1Ylzr5YpcdqyxU,9538
|
375
378
|
sglang/srt/layers/quantization/w8a8_fp8.py,sha256=ZB6ydquyPOORDe9OCoEBRH6dsQhCeRWv1YIgm8UDQwQ,11622
|
376
379
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=aPMm-sk5nBpHwHzkedvuAWXiqWpp_NPyVaPHxHnuhVg,10847
|
377
380
|
sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -548,20 +551,20 @@ sglang/srt/lora/backend/base_backend.py,sha256=EIz8I-GIrdmK4fISw3ENhbJVVITaxKfyL
|
|
548
551
|
sglang/srt/lora/backend/flashinfer_backend.py,sha256=el6IAB4kTgDTbwCggmqFuukliyoapN5X6FLksG-4wJ8,4151
|
549
552
|
sglang/srt/lora/backend/triton_backend.py,sha256=uqwBGlguXX8EkCKjSPqac6SPTgcKA31u3u6HkKPQcos,2530
|
550
553
|
sglang/srt/lora/triton_ops/__init__.py,sha256=JGOYPIn1XbGcyJTbt8A0qoc02PYONSGNNjGkC8yJpAM,283
|
551
|
-
sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=
|
552
|
-
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=
|
553
|
-
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=
|
554
|
-
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=
|
555
|
-
sglang/srt/managers/cache_controller.py,sha256=
|
554
|
+
sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=QURCYxHNR8Ls4SQtt3dvdgjvdDVhywI9tOzsK8SV9m8,5779
|
555
|
+
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=iz5scFNo2YFTeiM8beWg9Z1oZI-6AM_T1wBMCQ6qp2Q,6485
|
556
|
+
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=Ksova04wgeGsFqGOXWqJtMYaHgyUYcx8VU42BZQOkVA,5129
|
557
|
+
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=VqCAFvUtq_l-0RGIkx3W_fzD55QcW2FAcqpvSHOBFp4,5115
|
558
|
+
sglang/srt/managers/cache_controller.py,sha256=HPQGLwjey6GeW60Vu6MVeLYtRsJrh99_-dqHYSzlBkE,13614
|
556
559
|
sglang/srt/managers/configure_logging.py,sha256=8sNXZ2z9pBWOwn-X3wyz013Ob8Nbm1zDxRkxoZjH-l4,1633
|
557
560
|
sglang/srt/managers/data_parallel_controller.py,sha256=PZ-wOcAFn3PQqMB6I2vjIsFmplf0nlNl5hqTDKTHTG8,12112
|
558
561
|
sglang/srt/managers/detokenizer_manager.py,sha256=SpLxTsSPKBZfD-ZMhJ5zpPPGuUb8PmcYgFSL9CsurU4,10696
|
559
|
-
sglang/srt/managers/io_struct.py,sha256=
|
560
|
-
sglang/srt/managers/mm_utils.py,sha256=
|
562
|
+
sglang/srt/managers/io_struct.py,sha256=gGyTIY3-OhUg2UgkvSxYWiDcc2S9FMJsDJuj182fTNY,35534
|
563
|
+
sglang/srt/managers/mm_utils.py,sha256=Dqrk5wG3TDVBz4eZV77L9DmUZFjbz0TqF5ASyMhF2lo,26783
|
561
564
|
sglang/srt/managers/multimodal_processor.py,sha256=mzCrN-8H0bE0iMO8UzxmYmhE2M1qsbVJXGdhAYcjjYA,2016
|
562
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
565
|
+
sglang/srt/managers/schedule_batch.py,sha256=FdNt7oe9Y-Akfwk4yJ3jNDhd8i1-aO5bHkC0-3IvESU,73114
|
563
566
|
sglang/srt/managers/schedule_policy.py,sha256=0T8URzQmLvEmG-42-SFBBl9WnsOSwYO8-_CcBpuD38M,20474
|
564
|
-
sglang/srt/managers/scheduler.py,sha256=
|
567
|
+
sglang/srt/managers/scheduler.py,sha256=w0U6_NTGiq81wQ1cd_SwcrkeloxARKnqwufdWe61k-4,112727
|
565
568
|
sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=XAK2aeLleZBaLtzebEKdpaz9E7uKnHV7ywHSk_WPDYo,30148
|
566
569
|
sglang/srt/managers/session_controller.py,sha256=dzlMNZlo20FTSl64QqK7y7pElsdCy8ICOWWBPTBVwgs,6040
|
567
570
|
sglang/srt/managers/template_manager.py,sha256=RrwRA2oqId_PMQ98qJQGwIxMroOxiorl2sGC9ARou_0,8543
|
@@ -574,11 +577,11 @@ sglang/srt/mem_cache/allocator.py,sha256=MJUPQt4ECj4ReGeUVGVUNfwrwjHBbf3youdkyp3
|
|
574
577
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=XHSzXKgBin-m1HsL47K-GobhLnajaGxqLqGtrLsWaZ0,2540
|
575
578
|
sglang/srt/mem_cache/chunk_cache.py,sha256=MWo4DwRkHKOLKyow53YrGQdvn2dI7hVJuf2Gf-SPAr0,3110
|
576
579
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
577
|
-
sglang/srt/mem_cache/hiradix_cache.py,sha256=
|
578
|
-
sglang/srt/mem_cache/memory_pool.py,sha256=
|
579
|
-
sglang/srt/mem_cache/memory_pool_host.py,sha256=
|
580
|
+
sglang/srt/mem_cache/hiradix_cache.py,sha256=0jZEWFP4k5LkKMiCc-G0G9GD7oPoP9zNWaAFPMeVFSw,17137
|
581
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=HkdgOYcGfp72el-847b_VOAsjymD9Cyr7BudHhIBoP8,39002
|
582
|
+
sglang/srt/mem_cache/memory_pool_host.py,sha256=G_vmKQFw6jvPnaxjltRLXueDUjlqYgIdxRqoM1kTj5Q,8863
|
580
583
|
sglang/srt/mem_cache/multimodal_cache.py,sha256=wZl2KeEl3xeoEsYdH33UoM-FO8kqfLo_XUgereJVvoM,1348
|
581
|
-
sglang/srt/mem_cache/radix_cache.py,sha256=
|
584
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=qZSsdlTVCZ3UCqkwe3IDYE07-QE72hYExIb2kbhyMmQ,18096
|
582
585
|
sglang/srt/metrics/collector.py,sha256=C9QEJDOEdOPBwy2IJwFS3R6VbGzVzGs2xakKCCPvQDk,19903
|
583
586
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
584
587
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=3pc1_TpGmUAI51SxVubGGZIRuBmZrj8kOuch970RN2A,30505
|
@@ -597,7 +600,7 @@ sglang/srt/models/dbrx.py,sha256=4pn_fdoATg01VEqNnIAxNEsKV5XU7gwHyd289eydq1s,155
|
|
597
600
|
sglang/srt/models/deepseek.py,sha256=ZnN02HdgXCB23Vno5V9UMUoOxH5HC82vNTwsVulUJ-o,17206
|
598
601
|
sglang/srt/models/deepseek_janus_pro.py,sha256=OeeI7vZbE4HGpxa8CwT6-Lbfs7J7WMQ3oBNpVJQpv3w,70450
|
599
602
|
sglang/srt/models/deepseek_nextn.py,sha256=47fehxRdiOizr0rdLg5f1fzQEx6gGAOcDcWKtblloyk,5928
|
600
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
603
|
+
sglang/srt/models/deepseek_v2.py,sha256=q0zFTMqeqp36ScxAOr_ukPOhQd43u_XeKTA0VLzumCg,101271
|
601
604
|
sglang/srt/models/deepseek_vl2.py,sha256=F-nf2qRiiZNg9js__rYFEc9fNs_HVEfAi_Mh1jvqwkQ,13095
|
602
605
|
sglang/srt/models/exaone.py,sha256=TpO-rtCpEZ8Ua7hGFnS8l2oAYhY0Pij50grc9WQ2mvc,13576
|
603
606
|
sglang/srt/models/gemma.py,sha256=4cdrPISg1VKnsuI-QPTpYvet4BrX8BMKvCIN82iLskw,12641
|
@@ -639,7 +642,7 @@ sglang/srt/models/mistral.py,sha256=Ox07RJpTYl-4Pb25UIghL_66o9TTA6lPmsUyent_Flc,
|
|
639
642
|
sglang/srt/models/mixtral.py,sha256=n-Pz7fyWE4m6Uh7sjzAYKNr5Wy4QUj5Yekl0qiCwQI8,17055
|
640
643
|
sglang/srt/models/mixtral_quant.py,sha256=-kQw9r8KcLdO8SNN9RKXzrGq9Q2Al9l9cWHi1VrZSRM,15260
|
641
644
|
sglang/srt/models/mllama.py,sha256=pkS0UWiqKJHRsFtSZzpSI9O6B5_j1OtI5pf_nyDPp3U,39707
|
642
|
-
sglang/srt/models/mllama4.py,sha256=
|
645
|
+
sglang/srt/models/mllama4.py,sha256=EJgmzUIVholvTSPIc1n4fbhZ3_CaXiNQpGzkhZs_BGw,20130
|
643
646
|
sglang/srt/models/olmo.py,sha256=7-q_fA6XXdG7kPUjpUzYkzMUWJobuSjhqjYw9xSUs_c,12671
|
644
647
|
sglang/srt/models/olmo2.py,sha256=azmljhJF4ivcQfUtfsAUxq3ducE4tRKTL6iwe0IKYMg,14327
|
645
648
|
sglang/srt/models/olmoe.py,sha256=TMzt-yB891bvA4X50xL0NjNnFYSx9imlA7N1EG8KNK0,15949
|
@@ -667,7 +670,7 @@ sglang/srt/models/vila.py,sha256=Q8Z4q8DH3SAiaYocGys6AkEpw8zg3-l3pb5wLcyVves,978
|
|
667
670
|
sglang/srt/models/xverse.py,sha256=DsNVI9JpzN4jj0Ry6aTrj7r-xq5YLOoDX2kH4YLJA-I,14035
|
668
671
|
sglang/srt/models/xverse_moe.py,sha256=7KCM2-j12towDMNvXkuuYiBOmNauH6NG4Ip40x0khqA,16782
|
669
672
|
sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
|
670
|
-
sglang/srt/multimodal/mm_utils.py,sha256=
|
673
|
+
sglang/srt/multimodal/mm_utils.py,sha256=6bV1_a7Iet9W0y3qdd0E-hFM78zalIzNZCYbLzJX2y8,12640
|
671
674
|
sglang/srt/multimodal/processors/base_processor.py,sha256=05moS0iDYUfI9CKzio5ip8cjsfUjqYi_S5fzArWJxw8,21539
|
672
675
|
sglang/srt/multimodal/processors/clip.py,sha256=chKReTXD4ETUQ0s7BOk_AmWHEZXR5no6fprrnHwSNIM,1271
|
673
676
|
sglang/srt/multimodal/processors/deepseek_vl_v2.py,sha256=VHrUbOTF_18fPYaEx_awS1UMhOy0nhTXoA17wakwyQ4,3281
|
@@ -679,7 +682,7 @@ sglang/srt/multimodal/processors/kimi_vl.py,sha256=2KMB6iEXizHXzE6yjtzS7V1RFhsA2
|
|
679
682
|
sglang/srt/multimodal/processors/llava.py,sha256=tIjeDPYxvMWUMXXLcfzzuaLPvdBtPrRzM25QzP0cnaE,7962
|
680
683
|
sglang/srt/multimodal/processors/minicpm.py,sha256=9Y8KCo5eCX274CJIx1D8BVWgferjc0Uf6Zd_HR_IMnY,5316
|
681
684
|
sglang/srt/multimodal/processors/mlama.py,sha256=bW0hVPtRMqYz1RR2I19QtZLUTTPzInZv_6ZhVPJBieo,1406
|
682
|
-
sglang/srt/multimodal/processors/mllama4.py,sha256=
|
685
|
+
sglang/srt/multimodal/processors/mllama4.py,sha256=sekRM4DS7mBXpseRoaTlmnH-EjmQ9wFZCkp9q0_FfLE,5603
|
683
686
|
sglang/srt/multimodal/processors/phi4mm.py,sha256=ZYH1fWTpbz6wLGyV6c51kd54vwPRm0qdZiQcc0GlXGI,2337
|
684
687
|
sglang/srt/multimodal/processors/pixtral.py,sha256=VsCIFJpIUM3pSGNe_uRd8NlH7zSokW4xHOrZSi70R1o,4055
|
685
688
|
sglang/srt/multimodal/processors/qwen_vl.py,sha256=lKVWvvtB4OWfyJ9YZLuJapiW93cGb4T3SoqSAsPc1S4,6874
|
@@ -716,6 +719,7 @@ sglang/test/test_block_fp8_deep_gemm_blackwell.py,sha256=Hnhq4kkyINHb4ONedkp5Kf7
|
|
716
719
|
sglang/test/test_block_fp8_ep.py,sha256=Ufq6p32sb1RSbFKaSnsVF8RrqqwwAACBUgOJTEH2Lbc,10856
|
717
720
|
sglang/test/test_custom_ops.py,sha256=2bSo9P5_rJZYFq8Y8IKRimDfFyZZGJluhL7Ngny0Pf4,5571
|
718
721
|
sglang/test/test_cutlass_moe.py,sha256=beXdvpwpHh0O8fpLW-RV3Bx8Wu0lRFG_SNkL3Op5ITA,9905
|
722
|
+
sglang/test/test_cutlass_w4a8_moe.py,sha256=LmeSS7BrKO6-1mnksOqmo0nvJ_y3oeeiT5ytRkHk2A0,8780
|
719
723
|
sglang/test/test_deepep_utils.py,sha256=749ysTBGNzh6rYUCJhhZBtZpeD15eWTeNHYCytcvZtc,7448
|
720
724
|
sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
|
721
725
|
sglang/test/test_fp4_moe.py,sha256=U4LHzpGl6pCiCZiENRPxVP6hxqkqRZ_0WXbB29vYeAo,8187
|
@@ -726,8 +730,8 @@ sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
726
730
|
sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
|
727
731
|
sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
|
728
732
|
sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
|
729
|
-
sglang-0.4.9.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
730
|
-
sglang-0.4.9.dist-info/METADATA,sha256=
|
731
|
-
sglang-0.4.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
732
|
-
sglang-0.4.9.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
733
|
-
sglang-0.4.9.dist-info/RECORD,,
|
733
|
+
sglang-0.4.9.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
734
|
+
sglang-0.4.9.post1.dist-info/METADATA,sha256=FpRJrvi06EAhGk6RNzufoaKNu-D0u_xFKAzv1pjRmv8,27220
|
735
|
+
sglang-0.4.9.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
736
|
+
sglang-0.4.9.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
737
|
+
sglang-0.4.9.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|