sglang 0.4.4.post2__py3-none-any.whl → 0.4.4.post3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_serving.py +23 -3
- sglang/srt/configs/deepseekvl2.py +10 -1
- sglang/srt/configs/model_config.py +5 -16
- sglang/srt/distributed/device_communicators/custom_all_reduce.py +1 -1
- sglang/srt/distributed/parallel_state.py +32 -5
- sglang/srt/entrypoints/http_server.py +7 -1
- sglang/srt/entrypoints/verl_engine.py +2 -0
- sglang/srt/function_call_parser.py +0 -1
- sglang/srt/layers/attention/flashattention_backend.py +218 -79
- sglang/srt/layers/dp_attention.py +12 -1
- sglang/srt/layers/moe/topk.py +30 -3
- sglang/srt/layers/quantization/__init__.py +134 -165
- sglang/srt/layers/quantization/awq.py +200 -0
- sglang/srt/layers/quantization/fp8_kernel.py +2 -1
- sglang/srt/layers/quantization/gptq.py +30 -40
- sglang/srt/layers/quantization/w8a8_fp8.py +1 -1
- sglang/srt/layers/rotary_embedding.py +12 -0
- sglang/srt/lora/backend/base_backend.py +4 -4
- sglang/srt/lora/backend/flashinfer_backend.py +12 -9
- sglang/srt/lora/backend/triton_backend.py +5 -8
- sglang/srt/lora/layers.py +19 -33
- sglang/srt/lora/lora_manager.py +20 -7
- sglang/srt/lora/mem_pool.py +12 -6
- sglang/srt/lora/triton_ops/gate_up_lora_b.py +10 -4
- sglang/srt/lora/triton_ops/qkv_lora_b.py +8 -3
- sglang/srt/lora/triton_ops/sgemm_lora_a.py +16 -5
- sglang/srt/lora/triton_ops/sgemm_lora_b.py +11 -6
- sglang/srt/lora/utils.py +6 -0
- sglang/srt/managers/io_struct.py +4 -2
- sglang/srt/managers/multimodal_processors/clip.py +63 -0
- sglang/srt/managers/schedule_batch.py +1 -0
- sglang/srt/managers/scheduler.py +25 -19
- sglang/srt/managers/tokenizer_manager.py +0 -1
- sglang/srt/managers/tp_worker.py +3 -0
- sglang/srt/model_executor/cuda_graph_runner.py +9 -8
- sglang/srt/model_executor/model_runner.py +9 -6
- sglang/srt/model_loader/loader.py +11 -1
- sglang/srt/model_loader/weight_utils.py +6 -3
- sglang/srt/models/clip.py +563 -0
- sglang/srt/models/deepseek_janus_pro.py +2 -2
- sglang/srt/models/deepseek_v2.py +151 -26
- sglang/srt/models/gemma3_causal.py +12 -2
- sglang/srt/models/gemma3_mm.py +6 -0
- sglang/srt/openai_api/adapter.py +88 -87
- sglang/srt/openai_api/protocol.py +10 -5
- sglang/srt/patch_torch.py +71 -0
- sglang/srt/server_args.py +21 -11
- sglang/srt/speculative/eagle_worker.py +1 -1
- sglang/srt/utils.py +33 -0
- sglang/test/runners.py +27 -2
- sglang/test/test_utils.py +1 -1
- sglang/version.py +1 -1
- {sglang-0.4.4.post2.dist-info → sglang-0.4.4.post3.dist-info}/METADATA +8 -4
- {sglang-0.4.4.post2.dist-info → sglang-0.4.4.post3.dist-info}/RECORD +57 -53
- {sglang-0.4.4.post2.dist-info → sglang-0.4.4.post3.dist-info}/WHEEL +0 -0
- {sglang-0.4.4.post2.dist-info → sglang-0.4.4.post3.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.4.post2.dist-info → sglang-0.4.4.post3.dist-info}/top_level.txt +0 -0
@@ -3,13 +3,13 @@ sglang/api.py,sha256=vHiKBg8wwIdmrpnGclop5BzJ-1Q88emrlrfLwNCHg98,7010
|
|
3
3
|
sglang/bench_offline_throughput.py,sha256=OQb-AjL4UNymmir02ht43uzgaNsnO_I11nXSowKMqBI,13841
|
4
4
|
sglang/bench_one_batch.py,sha256=Fp6HBBJHrw672Q1gnklJ7dYboYYjR92D2fNCvbrM3M0,17935
|
5
5
|
sglang/bench_one_batch_server.py,sha256=8VYNhaQbWGP8TkNVuy_sPjD5FiuVZHamtGRWKwa-Z-Q,5962
|
6
|
-
sglang/bench_serving.py,sha256=
|
6
|
+
sglang/bench_serving.py,sha256=9w5bzcQlm9bo7vWTq3XUJIaKvZy5k2vl4CNjgGq2LV8,56095
|
7
7
|
sglang/check_env.py,sha256=76itNLUw9KlqbiY1BI4u4YaMZaqyCNcrCLUIb6aHflM,8396
|
8
8
|
sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
|
9
9
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
10
10
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
11
11
|
sglang/utils.py,sha256=Ns1-swsHRb8jYA0wWCQjCR_CDBWIJgyoooEng4fsqQc,16155
|
12
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/version.py,sha256=Lhj8wFyI_a4P5gAvUzUTThFvAemjjMQEp_O1QuZa64c,28
|
13
13
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
sglang/lang/chat_template.py,sha256=xZ-kQpgb4-NY6QOqgRjOODZW5G4EvJnplaqYF6Ng2Ow,18952
|
15
15
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
@@ -29,25 +29,26 @@ sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
|
29
29
|
sglang/srt/code_completion_parser.py,sha256=HhEUzdL-FVBsOot9tKDKA1l8Gdx8qsF1RRg-zHNpmLQ,5400
|
30
30
|
sglang/srt/conversation.py,sha256=mzNPZX7ndgmm1E8azuK8eytN6bNCEu3WbcPReRFfhd0,27819
|
31
31
|
sglang/srt/custom_op.py,sha256=vhdofFbWtpdtaA4NG4tkanWwEsfvBnjh6OPKOxmxXdU,3426
|
32
|
-
sglang/srt/function_call_parser.py,sha256=
|
32
|
+
sglang/srt/function_call_parser.py,sha256=buYENeNEP5bhsvD424yGCa9wOqSfVOZSRn6zLiSJp5I,23733
|
33
33
|
sglang/srt/hf_transformers_utils.py,sha256=_QYTl9LpU0jmKPlYooHi1etwMvb5v40JIrG_t_Fx06w,9215
|
34
34
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
35
35
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
36
|
+
sglang/srt/patch_torch.py,sha256=Fw_QGqSsAdyCumi0dT2HyPlppf9xd3-tQPvwuBZfhxU,2625
|
36
37
|
sglang/srt/reasoning_parser.py,sha256=45xsU9RCPfyG4_Zx4y3-JPyNgAtrqwKI4j5R2NT4g1s,5594
|
37
38
|
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
38
|
-
sglang/srt/server_args.py,sha256=
|
39
|
+
sglang/srt/server_args.py,sha256=SEZjYkfjwv0V1xQafh7Zex7NS5MVenfKWytcd4kZ3t8,49024
|
39
40
|
sglang/srt/torch_memory_saver_adapter.py,sha256=KG3wM9-xZsSdsmORofArnNR7hH55GEyFxaderCDcK9w,1853
|
40
|
-
sglang/srt/utils.py,sha256=
|
41
|
+
sglang/srt/utils.py,sha256=qoo6k_-uxthkMhotF9ba_-njOV-v1AbEcJ9XdQwcha8,56116
|
41
42
|
sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
|
42
43
|
sglang/srt/configs/__init__.py,sha256=vulncVn70WqIT6s0HaB8p_Q6FjOiaLwNZWpoJS9FIuQ,399
|
43
44
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
44
45
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
45
|
-
sglang/srt/configs/deepseekvl2.py,sha256=
|
46
|
+
sglang/srt/configs/deepseekvl2.py,sha256=qeI7TO4f2DHIxCRiuRxpnP17K5lCMSNGJjEcq2yF9Ds,23328
|
46
47
|
sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
|
47
48
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
48
49
|
sglang/srt/configs/janus_pro.py,sha256=-QtJ4ZGZiAJb0AkOEcuCHzIKLw23nF8nRk3rdCcoUO0,19016
|
49
50
|
sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
|
50
|
-
sglang/srt/configs/model_config.py,sha256=
|
51
|
+
sglang/srt/configs/model_config.py,sha256=U8cynUgfijwutxZ-8hhoaWTKR5NqpXGIrGxlhbWSBlY,20556
|
51
52
|
sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
|
52
53
|
sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
|
53
54
|
sglang/srt/connector/base_connector.py,sha256=i6i1TIzsz4NbSEkrdMPq-urb2sN2aLAx8dazga4gB9U,2833
|
@@ -69,10 +70,10 @@ sglang/srt/disaggregation/prefill.py,sha256=zw8hDy6Txq_MpC5j0fndLNcKoypT2BhxTkqq
|
|
69
70
|
sglang/srt/disaggregation/utils.py,sha256=ebOZ3lSFVkbNtl6uUfS6sYYYVBjgmWdQLOsqIZBGgN0,1088
|
70
71
|
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
71
72
|
sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
|
72
|
-
sglang/srt/distributed/parallel_state.py,sha256=
|
73
|
+
sglang/srt/distributed/parallel_state.py,sha256=hoTgLYfHIKMb_tSwBTauuusJZ8oY9BsiubTTOF8UfIw,50713
|
73
74
|
sglang/srt/distributed/utils.py,sha256=U-BSaXYjWwnfG8g-tUfBhjKt5Ug097nyHtu3g3aea_Y,8473
|
74
75
|
sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
|
75
|
-
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=
|
76
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=iLCrFQ3dyH_xZL3vI-paTpxeP7Rt4DszRIk8qdbtfvA,22214
|
76
77
|
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=q2q1A_Sqvrvkrgf7Tjg5XhXR1JWzzUUPHSicAKK2SjE,11022
|
77
78
|
sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
|
78
79
|
sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--ZqapHtvm70Lgl7obtE6ZfgeAiU,10064
|
@@ -80,10 +81,10 @@ sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6
|
|
80
81
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
81
82
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
82
83
|
sglang/srt/entrypoints/engine.py,sha256=b4JmlvK4M2cWfcbc5NOlR7HjkNzDENlzL4rbSaPjBdU,21295
|
83
|
-
sglang/srt/entrypoints/http_server.py,sha256=
|
84
|
-
sglang/srt/entrypoints/verl_engine.py,sha256=
|
84
|
+
sglang/srt/entrypoints/http_server.py,sha256=VM-gVwW-Ef_SikxoVSE06yydlLo6TGJykjKgffnGoXY,27104
|
85
|
+
sglang/srt/entrypoints/verl_engine.py,sha256=PypBCkUJYy7iP3mKB-W0KYkjZzs4Rq6DqxNgG_nBZaM,5903
|
85
86
|
sglang/srt/layers/activation.py,sha256=1ykXZO0BGz7DFVE-EK26b02I5AgH2IuU4PQB6oUcF4M,6003
|
86
|
-
sglang/srt/layers/dp_attention.py,sha256=
|
87
|
+
sglang/srt/layers/dp_attention.py,sha256=fC1kaYkHGoFjZ2KHTzPFW0e93El-XLRt7ZidkXYIVhE,7595
|
87
88
|
sglang/srt/layers/elementwise.py,sha256=y2mQqjbF2FmFtNYBk5ecTyaj3ELoZyz-rWPY8rrxCtk,13765
|
88
89
|
sglang/srt/layers/layernorm.py,sha256=189bORMggKhYcEYEjl6JRcuIoUPllHo3SheoH6YiORY,4546
|
89
90
|
sglang/srt/layers/linear.py,sha256=HYIGxpRYL6x-jNOkyNtGAw5Ak9Nq8jkntddgTBER_1w,51486
|
@@ -91,13 +92,13 @@ sglang/srt/layers/logits_processor.py,sha256=Vp8ibljVEezTr54xzeOcjiJR7JdYO8ItkO5
|
|
91
92
|
sglang/srt/layers/parameter.py,sha256=0OTMtmsNds42e3z3wHTRJiUfxCWFwSL6DHrqgeTgGt8,15151
|
92
93
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
93
94
|
sglang/srt/layers/radix_attention.py,sha256=UDL0y4Zasay_Rk-_XmIU4kaGbaF26ONvEHX5EQzLrqI,2260
|
94
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
95
|
+
sglang/srt/layers/rotary_embedding.py,sha256=W4w6Rn-7AgdOqrgVxWY8QJwSpiKUrRbH03bmX6hdw1I,43175
|
95
96
|
sglang/srt/layers/sampler.py,sha256=yipSyN5UWGwGS-BC-WzWMmelys4CCDtK_8b1OpaK6sM,11622
|
96
97
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
97
98
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=QUxd4sELx6p3dHvEKmccPZ-phdd_9EjNdwjH3SJ9zxI,22238
|
98
99
|
sglang/srt/layers/attention/base_attn_backend.py,sha256=X_GIbQuU9njtUEGdUP7E_KRhmGxj3UyPHNESlL3QaQ8,3264
|
99
100
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
|
100
|
-
sglang/srt/layers/attention/flashattention_backend.py,sha256=
|
101
|
+
sglang/srt/layers/attention/flashattention_backend.py,sha256=kiiuklwtEKCgKjQPgvNnl9zjX8IGYqti4j21b-DcRGM,16831
|
101
102
|
sglang/srt/layers/attention/flashinfer_backend.py,sha256=m1rOYGr9uaxeUPA8BGPGGe8yicLJKNmRm4zxkJoQU9k,45980
|
102
103
|
sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=BgbGK5iROulOco7o3vYTaPw1Ei3EaQj6Cn9W57Z4amQ,30360
|
103
104
|
sglang/srt/layers/attention/flashmla_backend.py,sha256=iWzxEEIJ6g3RrovSReqAZMjjcv36qCIsPEHZGs_J8JI,10543
|
@@ -112,7 +113,7 @@ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMP
|
|
112
113
|
sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
|
113
114
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=KF0J5W5biWYWbERUHAduQFnUV5om9VaYv0I9avsVfgU,4330
|
114
115
|
sglang/srt/layers/moe/router.py,sha256=gvyK7hXlujfCZCmAIFc3oxfgjuAjzlpPe3mp1Blc6Y0,10419
|
115
|
-
sglang/srt/layers/moe/topk.py,sha256=
|
116
|
+
sglang/srt/layers/moe/topk.py,sha256=udkEhgzXbNqk0-bSReeLMDtP_3aOtTlZa6qPn82keVs,8436
|
116
117
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
117
118
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=E-VyToHN13f443U3UboDgLwv-t8gAcXOtPHkrI2xUpI,18793
|
118
119
|
sglang/srt/layers/moe/ep_moe/layer.py,sha256=ZiS9viPdcpB7SjylNVB3DSuF3Az1jGjDPdCqd657Qf0,36990
|
@@ -239,19 +240,20 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
|
|
239
240
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
|
240
241
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
241
242
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
242
|
-
sglang/srt/layers/quantization/__init__.py,sha256=
|
243
|
+
sglang/srt/layers/quantization/__init__.py,sha256=ppIxDMFk-XcmDS7SWZe5fc9ymJIu3H3rCHTqm6utKCU,12001
|
244
|
+
sglang/srt/layers/quantization/awq.py,sha256=VImnVCU_QBLFba6S88T0dJ-vLy6SMm3OLIMEdllDfVI,6663
|
243
245
|
sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
|
244
246
|
sglang/srt/layers/quantization/blockwise_int8.py,sha256=BS4nmo18QaC3vyCe9Wwe0Te-7FmFUe-udl7xbhRaU0s,14887
|
245
247
|
sglang/srt/layers/quantization/fp8.py,sha256=J9P6SwZ1PIb3dEJoP7X2Uw5VOtU35vU5jugkoYyaZC4,41081
|
246
|
-
sglang/srt/layers/quantization/fp8_kernel.py,sha256=
|
248
|
+
sglang/srt/layers/quantization/fp8_kernel.py,sha256=JRalHJ-btDpzl3oXu2R_ZoJBu5TzBBmW_wKZDFs-usQ,24384
|
247
249
|
sglang/srt/layers/quantization/fp8_utils.py,sha256=OL_tV-NvNGcXV2i3mOXytjzi9d4t59vOxvZN_hBc1KA,21572
|
248
|
-
sglang/srt/layers/quantization/gptq.py,sha256=
|
250
|
+
sglang/srt/layers/quantization/gptq.py,sha256=e4rMz374-yQQqeAI77WPxfcAaRk38GeN2akEpvnC_Do,15141
|
249
251
|
sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRn_imIw8kNgqdtb2lr7BettjgDgimbl1Rubnamjh8,11352
|
250
252
|
sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
|
251
253
|
sglang/srt/layers/quantization/kv_cache.py,sha256=rJi6amyLZsquUMo_V5iLlPMqdsGTLgxh4popN1xUHCQ,4236
|
252
254
|
sglang/srt/layers/quantization/modelopt_quant.py,sha256=mne4uKF0R-K0OvWN7X5ZxD4LdXKBc6GvmpZzIW6gkmM,6969
|
253
255
|
sglang/srt/layers/quantization/utils.py,sha256=BmbovB_SxPcV7DRzGod8plawLbWRLIiN8Q2l_oWtAfw,5627
|
254
|
-
sglang/srt/layers/quantization/w8a8_fp8.py,sha256=
|
256
|
+
sglang/srt/layers/quantization/w8a8_fp8.py,sha256=XcQdgqXA3eKbAf-4_0I81Y5Nvjns3bQTocovnN8141w,6234
|
255
257
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=V5vxn0wmUL1szj38lsJOKeNNEvFHisU7hZZLO4FfoNc,8733
|
256
258
|
sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
257
259
|
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=YEtWykakVRjS4rko8T7ui0uf_Q15n9SPcASY9EkWYGk,25344
|
@@ -412,39 +414,40 @@ sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a
|
|
412
414
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
|
413
415
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=FImA-TJ_tQDjqwoNWxS--sRDoKDXf9gamlME3tkxH58,3252
|
414
416
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
415
|
-
sglang/srt/lora/layers.py,sha256=
|
417
|
+
sglang/srt/lora/layers.py,sha256=eqU5PxLx9jsmp0fxQ-e9mlSD0Zz4Y9Uan_x9Z5-y1EQ,11835
|
416
418
|
sglang/srt/lora/lora.py,sha256=9BpasJObx-XL_qSoQhheGaYBJ3PlKqJAARvruIOJE0w,7514
|
417
419
|
sglang/srt/lora/lora_config.py,sha256=qDgMTx_69jyJUl29O5FxLzYa0BMhqYVXWXfyyVOvGm0,1684
|
418
|
-
sglang/srt/lora/lora_manager.py,sha256=
|
419
|
-
sglang/srt/lora/mem_pool.py,sha256=
|
420
|
-
sglang/srt/lora/utils.py,sha256=
|
420
|
+
sglang/srt/lora/lora_manager.py,sha256=Gpkq4N_cJGMIDtxUCScwP4LGcHyUJZ457EI_ti30_A8,9187
|
421
|
+
sglang/srt/lora/mem_pool.py,sha256=xUFoHUDJgX9lt2YugD9HUY5tIMnJiazYMZ6LYqSGv-E,9633
|
422
|
+
sglang/srt/lora/utils.py,sha256=GjEBgsGhDhX4NqVqeaciznQ8RotKZmb2c-nw4YMLHxA,5251
|
421
423
|
sglang/srt/lora/backend/__init__.py,sha256=FziFT8HguMFj-h0tUCc4_UEbtOWMlYi4gNlYJcArWh4,671
|
422
|
-
sglang/srt/lora/backend/base_backend.py,sha256=
|
423
|
-
sglang/srt/lora/backend/flashinfer_backend.py,sha256=
|
424
|
-
sglang/srt/lora/backend/triton_backend.py,sha256=
|
424
|
+
sglang/srt/lora/backend/base_backend.py,sha256=tGpABAn3DVC8GONf8USkaxkzkpVsDYfgKrnLCsXpivo,4558
|
425
|
+
sglang/srt/lora/backend/flashinfer_backend.py,sha256=VmDSY2YqTLK2EBeqdMiNoirPxDifCMmfiCB3HNwpgvE,4138
|
426
|
+
sglang/srt/lora/backend/triton_backend.py,sha256=_QbqggFPHMPWgx3PI15yyyfxBCTxSpGA209x_frI12E,2517
|
425
427
|
sglang/srt/lora/triton_ops/__init__.py,sha256=JGOYPIn1XbGcyJTbt8A0qoc02PYONSGNNjGkC8yJpAM,283
|
426
|
-
sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=
|
427
|
-
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=
|
428
|
-
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=
|
429
|
-
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=
|
428
|
+
sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=CDGt7lpu9GjykgMtmwbZ3PEqjTlRYyh28AUlj1cRcmw,5279
|
429
|
+
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=HTfU3HxxxVyaG_aJrrVjPJTnqf62yvepcKJKYkG0XJQ,5944
|
430
|
+
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=ZmWEqHJaorRNNj-c_ZXPi_pX8X_yIAwudRHAJVa0m08,4350
|
431
|
+
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=Q58UzWUb3QFqY_ZxWA3poN373N0Hwkks5AQRKIuvFC8,4517
|
430
432
|
sglang/srt/managers/cache_controller.py,sha256=BRRyu110lJalLVLZbTCMOWbD4s-LG3f9Xz6sxU2dCrc,18144
|
431
433
|
sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMafoEADLIVfIM,1633
|
432
434
|
sglang/srt/managers/data_parallel_controller.py,sha256=Xkj2n9uDyq7a-AVDZlfzeuNkC4ibsSftb1_bed9hgQ4,10318
|
433
435
|
sglang/srt/managers/detokenizer_manager.py,sha256=HTfpJWMF1EImhKOnLJ96xPmYXm71xzaisLMfxg3zpgs,10111
|
434
436
|
sglang/srt/managers/expert_distribution.py,sha256=r3o5RGI0gnV7xb60AApqKYa0oiSB37oB7hQBX7P3xZM,3225
|
435
|
-
sglang/srt/managers/io_struct.py,sha256=
|
437
|
+
sglang/srt/managers/io_struct.py,sha256=_WYQ2x49Wc8WqmZH0Q5Te7zVrGRQkbn0ADghuDwyk7k,23852
|
436
438
|
sglang/srt/managers/mm_utils.py,sha256=KzodrStj3ouIEHLRja3TUml0YUQ59qmEPejks5ikPQk,13828
|
437
439
|
sglang/srt/managers/multimodal_processor.py,sha256=nTlktTXYo2NKr5Ab2AaSjydNQDdwpJHs4XdYr5zdtkU,2154
|
438
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
440
|
+
sglang/srt/managers/schedule_batch.py,sha256=2HOloc18BAM10aL8os34sQ3KM3sYi-yhDDahlTxrBtU,60106
|
439
441
|
sglang/srt/managers/schedule_policy.py,sha256=E1qVq2G3jptKdX9nlqfayeRBUll9xB6bK8nBf3EW32E,19469
|
440
|
-
sglang/srt/managers/scheduler.py,sha256=
|
442
|
+
sglang/srt/managers/scheduler.py,sha256=kH8oC7R_gir2Whsq2NMCbzdmd8nqGx0GXqXuVwr8dTE,80004
|
441
443
|
sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=u2sj6MViFTov0lVZSysZ-wph2pEqRCtCjwA1UdttZ7I,26338
|
442
444
|
sglang/srt/managers/session_controller.py,sha256=o-ifit0n4_xHLNmyD0Ams8FxGRgxFybX-Vz1hwgr3UQ,5755
|
443
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
444
|
-
sglang/srt/managers/tp_worker.py,sha256=
|
445
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=cYzNwHKr5Y5LMwHeU1YIyTIR88-QMpMYqcWt1nm8KLU,46161
|
446
|
+
sglang/srt/managers/tp_worker.py,sha256=IFiOhbNIya-7cqgp_Yg0ZXGcsgy9YS295AfxJYjFqzQ,8833
|
445
447
|
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=3_ZJ8Rq7v2ZDaRNTRu5Dy8AbqiAlJQp3IAKnn_WAwd8,9127
|
446
448
|
sglang/srt/managers/utils.py,sha256=59IuYvuEfi8ji_acKqH3Y1-2PaA_dSlZMbHSCqjQfCA,1629
|
447
449
|
sglang/srt/managers/multimodal_processors/base_processor.py,sha256=PpNwQBhEuXbztzS3cBFu7UL5sfKYay-WoqAweSVRRao,9984
|
450
|
+
sglang/srt/managers/multimodal_processors/clip.py,sha256=cuC7jynTkqRAm_yixey0Tv1KSblI97lCzOuX8-Ix8sg,2148
|
448
451
|
sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py,sha256=-qU99yCs3TYM8g9vOo5_SIWX91theu0ZREXZT5-A9yY,4594
|
449
452
|
sglang/srt/managers/multimodal_processors/gemma3.py,sha256=Ic8wTOS5TS9qc-SsGkKWQ_7NgnimCnnBeXw4UB8DTgM,2745
|
450
453
|
sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=GaVH3_HxGcQHRvFuP5zpkQ9m678cyVrbjb1iwwdV_fE,2780
|
@@ -461,28 +464,29 @@ sglang/srt/mem_cache/paged_allocator.py,sha256=Fl6d8rgkwGIgq3n7AKM7Jmm_aIgu86jJ2
|
|
461
464
|
sglang/srt/mem_cache/radix_cache.py,sha256=Lm-pco6CJ4orb9IfDpbHm5MnyK8Ya0OF1x9p88dv548,14906
|
462
465
|
sglang/srt/metrics/collector.py,sha256=0X40ZZ18182sx2t0eqeqoK7gspH36L343zNvSkgBvd0,9293
|
463
466
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
464
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
467
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=rIxQ4Yvt5-i5oDBxkzEm3OtvWid7yx73jUtMILF6WWQ,23207
|
465
468
|
sglang/srt/model_executor/forward_batch_info.py,sha256=RKRQMoMcAexG-YweQKl-uSXc4qe-yk4u6iTtWK_BOpw,19591
|
466
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
469
|
+
sglang/srt/model_executor/model_runner.py,sha256=TMcejBnQxNSZGst-8B04z9IX6smVxCjTeA6dy2-5U84,44807
|
467
470
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
468
|
-
sglang/srt/model_loader/loader.py,sha256=
|
471
|
+
sglang/srt/model_loader/loader.py,sha256=mK9ZspmhUhw5A0e5G3DF77HJ4-JsDvzLvwj5I2J3Vdg,53883
|
469
472
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
470
|
-
sglang/srt/model_loader/weight_utils.py,sha256=
|
473
|
+
sglang/srt/model_loader/weight_utils.py,sha256=Bkfgz6TUjkJJb8BiFxrv7FgbZFd9eW2y21jDBPdLWSo,32056
|
471
474
|
sglang/srt/models/baichuan.py,sha256=iXgta-W38OWesxmXWZJ73fUvPdu51EwTQzUD5mmfJ8s,15721
|
472
475
|
sglang/srt/models/chatglm.py,sha256=avLC7mjjGskBxCxy-9s0sMlAJjfFoG_y8VieR1QfDsM,13918
|
476
|
+
sglang/srt/models/clip.py,sha256=vNKS-wWs6Pl8hwJUY-3g8L3uax9Z_Voc2CF0C8IEEiQ,19554
|
473
477
|
sglang/srt/models/commandr.py,sha256=Ug-B0QcdWZufrTybC6K5yP3MLKNsYb-vzfrqUsXYUcI,15276
|
474
478
|
sglang/srt/models/dbrx.py,sha256=0Vf4yhqe8YeQuKR3P-agvYJScmHwH3-tFbyU8kv5QJM,15559
|
475
479
|
sglang/srt/models/deepseek.py,sha256=Le2MXij8m4hT7QYgD0bFMFmYhbgX7SMjoXZFB8BxgyA,16871
|
476
|
-
sglang/srt/models/deepseek_janus_pro.py,sha256
|
480
|
+
sglang/srt/models/deepseek_janus_pro.py,sha256=D5nJgMmrIqXW71r9_-_yd63G8DftnM1K8mS4hU8O_pc,70547
|
477
481
|
sglang/srt/models/deepseek_nextn.py,sha256=kca-2Fm2_SmqbOEFfd80pobooi1BXd1oe_4EsUM6SeI,13561
|
478
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
482
|
+
sglang/srt/models/deepseek_v2.py,sha256=7ljI2R9GmngO4gcrV3MLmioxeEKgCFyG-YL_J6QYA8Q,58695
|
479
483
|
sglang/srt/models/deepseek_vl2.py,sha256=T7zdLBnnBSVPCX4cec1B-MbnXru1-70KqEdDLqbYb_g,12956
|
480
484
|
sglang/srt/models/exaone.py,sha256=5iibqQTjpgosuGRt2rj2lWR0ShK2XGhbdFSnOWpaQss,13386
|
481
485
|
sglang/srt/models/gemma.py,sha256=3XxMDOKz4xMP6VzWoW8f0hmMf8LP8fhzMw5prsYC4e8,12602
|
482
486
|
sglang/srt/models/gemma2.py,sha256=MDe_HNkSpEJpw426tbx3fp271GBlSVEuhIdGeOB_jYA,16356
|
483
487
|
sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67Ksv2A,2618
|
484
|
-
sglang/srt/models/gemma3_causal.py,sha256=
|
485
|
-
sglang/srt/models/gemma3_mm.py,sha256=
|
488
|
+
sglang/srt/models/gemma3_causal.py,sha256=nKO-DRtvcXn5bHquxILgnp0fJT6usoB81W8kPVdYsA0,24934
|
489
|
+
sglang/srt/models/gemma3_mm.py,sha256=ZFgHB9-GBNZcMtYvUdynk16mWmKQ-oYxkDHhttWNQB8,17854
|
486
490
|
sglang/srt/models/gpt2.py,sha256=dAnfmsAL7JVHakryqrERR1jgL8mI1Op6nPHYfDCF7Ao,9802
|
487
491
|
sglang/srt/models/gpt_bigcode.py,sha256=EAN6xAXpa8m3DcBuH1D4rTPji2oG9NSozGXSNHtE2lw,10268
|
488
492
|
sglang/srt/models/granite.py,sha256=nu_Zl_PYn188gk1uYVZ76y4wwHZV7G0w7uanhqpSFUs,20813
|
@@ -523,8 +527,8 @@ sglang/srt/models/torch_native_llama.py,sha256=5tfFSMAXB3ScToqTALtCXa8Oo-qPCJh-K
|
|
523
527
|
sglang/srt/models/xverse.py,sha256=I7ivNsk6NRqPxlMUmdclpzDCvhAnWbv_GOj01MKHJrQ,13996
|
524
528
|
sglang/srt/models/xverse_moe.py,sha256=xLwn5pRwQrvj7zMmwl3o49m7xILb2ACRdWvm9hY8LDc,16743
|
525
529
|
sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
|
526
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
527
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
530
|
+
sglang/srt/openai_api/adapter.py,sha256=q0364qVBd-iymVnVBngv4ZMdZorl5XEYN6u8ZZCaDTs,71454
|
531
|
+
sglang/srt/openai_api/protocol.py,sha256=Y8PFFhLbzhpoERM6-WsTkm-ZuGcE-3tfenh9e-AC1vc,13374
|
528
532
|
sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
|
529
533
|
sglang/srt/sampling/sampling_batch_info.py,sha256=wrGGU27mWOi_yCBBCOvTQLBdyTjfkPuj7Hsk0zOFyH8,11989
|
530
534
|
sglang/srt/sampling/sampling_params.py,sha256=nXm44Inn91YtrMpAm5mDb6-97owRy-Bh6lZ0BIpw73I,5919
|
@@ -536,13 +540,13 @@ sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=NRh10AJrrQlGJ6S-enGdRe
|
|
536
540
|
sglang/srt/speculative/build_eagle_tree.py,sha256=SFQ3eHbhfNxOdxgqDP5wSV_ZlIVqLw7VivycNZ963N0,11690
|
537
541
|
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=t2mbGZY23E7WraJJQW_4uaLTfnbPDExRVC7jldtuY6s,9287
|
538
542
|
sglang/srt/speculative/eagle_utils.py,sha256=_T3eRd59bzCR2YE_Z4W6Pux40KpcueWwN-rrLe-ikFQ,25812
|
539
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
543
|
+
sglang/srt/speculative/eagle_worker.py,sha256=W6hoEW0tpNaoC0T01vEEMJOwaiZjhH4rtbvd2FOKxiY,24229
|
540
544
|
sglang/srt/speculative/spec_info.py,sha256=rhaKG0TzyF9XZYHEWp1jccwTBohSNsUDvxHFtAoOl18,709
|
541
545
|
sglang/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
542
546
|
sglang/test/few_shot_gsm8k.py,sha256=7VLbWl4nCQs1wjtW4q-46jf9jUCycSs5Iw8v7sUSzBw,4284
|
543
547
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
544
548
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
545
|
-
sglang/test/runners.py,sha256=
|
549
|
+
sglang/test/runners.py,sha256=zl_7wdwUbuCSZoA-f94VhnOI36VX_DwCt3cAEzIjm9s,30484
|
546
550
|
sglang/test/send_one.py,sha256=6FhbJ3c8RpXxvFTELRXaF97GpT7zXXsCDYZh1DqG22E,2550
|
547
551
|
sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
|
548
552
|
sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
|
@@ -557,11 +561,11 @@ sglang/test/test_custom_ops.py,sha256=XBTWh3jEsoe9hZ93p3LAUtjEj5l0qNEaZM5Mto6pbJ
|
|
557
561
|
sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
|
558
562
|
sglang/test/test_layernorm.py,sha256=2GMWqqNDuGvSMSsEBF5eDCzwVSYA9E6hGhRo6s4ecKg,3764
|
559
563
|
sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,18877
|
560
|
-
sglang/test/test_utils.py,sha256=
|
564
|
+
sglang/test/test_utils.py,sha256=bjRoX9pp98l9AKr0q9TgK8rVNxqMJ-1MeEZdrhSaMPw,30694
|
561
565
|
sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
562
566
|
sglang/test/attention/test_flashattn_backend.py,sha256=OxS1KsPs19nwZcDtdURj7_liT1cIfEXb6W4FH9KMaaE,10808
|
563
|
-
sglang-0.4.4.
|
564
|
-
sglang-0.4.4.
|
565
|
-
sglang-0.4.4.
|
566
|
-
sglang-0.4.4.
|
567
|
-
sglang-0.4.4.
|
567
|
+
sglang-0.4.4.post3.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
568
|
+
sglang-0.4.4.post3.dist-info/METADATA,sha256=jVtIt2_q-sSQu5V2AzV5Ex2pRWTJHW9PrNe_NuQ6WpM,25024
|
569
|
+
sglang-0.4.4.post3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
570
|
+
sglang-0.4.4.post3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
571
|
+
sglang-0.4.4.post3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|