sglang 0.4.1.post7__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_offline_throughput.py +17 -11
- sglang/bench_one_batch.py +14 -6
- sglang/bench_serving.py +47 -44
- sglang/lang/chat_template.py +31 -0
- sglang/srt/configs/load_config.py +1 -0
- sglang/srt/distributed/device_communicators/custom_all_reduce.py +5 -2
- sglang/srt/entrypoints/engine.py +5 -2
- sglang/srt/entrypoints/http_server.py +24 -0
- sglang/srt/function_call_parser.py +494 -0
- sglang/srt/layers/activation.py +5 -5
- sglang/srt/layers/dp_attention.py +3 -1
- sglang/srt/layers/layernorm.py +5 -5
- sglang/srt/layers/linear.py +24 -9
- sglang/srt/layers/logits_processor.py +1 -1
- sglang/srt/layers/moe/ep_moe/layer.py +20 -12
- sglang/srt/layers/moe/fused_moe_native.py +17 -3
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +18 -1
- sglang/srt/layers/moe/fused_moe_triton/layer.py +9 -0
- sglang/srt/layers/parameter.py +16 -7
- sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/fp8.py +4 -1
- sglang/srt/layers/rotary_embedding.py +6 -1
- sglang/srt/layers/sampler.py +28 -8
- sglang/srt/layers/torchao_utils.py +12 -6
- sglang/srt/managers/detokenizer_manager.py +1 -0
- sglang/srt/managers/io_struct.py +36 -5
- sglang/srt/managers/schedule_batch.py +31 -25
- sglang/srt/managers/scheduler.py +61 -35
- sglang/srt/managers/tokenizer_manager.py +4 -0
- sglang/srt/model_executor/cuda_graph_runner.py +23 -25
- sglang/srt/model_executor/forward_batch_info.py +5 -7
- sglang/srt/model_executor/model_runner.py +7 -4
- sglang/srt/model_loader/loader.py +75 -0
- sglang/srt/model_loader/weight_utils.py +91 -5
- sglang/srt/models/commandr.py +14 -2
- sglang/srt/models/dbrx.py +9 -1
- sglang/srt/models/deepseek_v2.py +3 -3
- sglang/srt/models/gemma2.py +9 -1
- sglang/srt/models/grok.py +1 -0
- sglang/srt/models/minicpm3.py +3 -3
- sglang/srt/models/torch_native_llama.py +17 -4
- sglang/srt/openai_api/adapter.py +139 -37
- sglang/srt/openai_api/protocol.py +5 -4
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +11 -14
- sglang/srt/sampling/sampling_batch_info.py +4 -14
- sglang/srt/server.py +2 -2
- sglang/srt/server_args.py +20 -1
- sglang/srt/speculative/eagle_utils.py +37 -15
- sglang/srt/speculative/eagle_worker.py +11 -13
- sglang/srt/utils.py +62 -65
- sglang/test/test_programs.py +1 -0
- sglang/test/test_utils.py +81 -22
- sglang/version.py +1 -1
- {sglang-0.4.1.post7.dist-info → sglang-0.4.2.dist-info}/METADATA +7 -7
- {sglang-0.4.1.post7.dist-info → sglang-0.4.2.dist-info}/RECORD +67 -56
- {sglang-0.4.1.post7.dist-info → sglang-0.4.2.dist-info}/LICENSE +0 -0
- {sglang-0.4.1.post7.dist-info → sglang-0.4.2.dist-info}/WHEEL +0 -0
- {sglang-0.4.1.post7.dist-info → sglang-0.4.2.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,18 @@
|
|
1
1
|
sglang/__init__.py,sha256=njc4c2IBYklSqVMiT70GL630Uddg5D_IU_6dthApPxc,1587
|
2
2
|
sglang/api.py,sha256=PuJTtrKJ50ddFNOuT22ChCSd7xJISkbi3pnGcbDJ9QQ,6882
|
3
3
|
sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
|
4
|
-
sglang/bench_offline_throughput.py,sha256=
|
5
|
-
sglang/bench_one_batch.py,sha256=
|
4
|
+
sglang/bench_offline_throughput.py,sha256=vIoF87HIpezB1x-xWzUl6SdXi88Fza8g4hDU7Gwecf4,13123
|
5
|
+
sglang/bench_one_batch.py,sha256=d-LuRHEyDZjh180OCN5fqTjr8Zusk3zc0vhoJ33x0B0,17905
|
6
6
|
sglang/bench_one_batch_server.py,sha256=iu73SsvYwnuRktYZDz1P6psMiRx8MbEbF5sbsYJdzYg,5962
|
7
|
-
sglang/bench_serving.py,sha256=
|
7
|
+
sglang/bench_serving.py,sha256=jYU3rYIDkzpYhjSpJw_IkEs_UNQfouNW4phs3z5TObc,54303
|
8
8
|
sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
|
9
9
|
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
12
12
|
sglang/utils.py,sha256=wvLVVC8U2qIhCSCrtzvV3wXapvJweir1XDNdpfoPFRM,11934
|
13
|
-
sglang/version.py,sha256=
|
13
|
+
sglang/version.py,sha256=6hfVa12Q-nXyUEXr6SyKpqPEDJW6vlRHyPxlA27PfTs,22
|
14
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
sglang/lang/chat_template.py,sha256=
|
15
|
+
sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
|
16
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
17
17
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
18
18
|
sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
|
@@ -28,19 +28,20 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
|
|
28
28
|
sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
|
29
29
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
30
30
|
sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
|
31
|
+
sglang/srt/function_call_parser.py,sha256=HMqCCd-YQeyADV_gTCduF9gmw2k3bRAkoJYcFsK3w3c,19230
|
31
32
|
sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
|
32
33
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
33
34
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
34
|
-
sglang/srt/server.py,sha256=
|
35
|
-
sglang/srt/server_args.py,sha256=
|
35
|
+
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
36
|
+
sglang/srt/server_args.py,sha256=GN9NAKDkSWpMQWoTBzHyvp-UimfKKpwAmzgiwUJTe4A,39792
|
36
37
|
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
37
|
-
sglang/srt/utils.py,sha256=
|
38
|
+
sglang/srt/utils.py,sha256=jzHsVQDYF25Z7rPjiaO4w5iz7ZskRRZxTvEiUeFcSJw,46380
|
38
39
|
sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
|
39
40
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
40
41
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
41
42
|
sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
|
42
43
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
43
|
-
sglang/srt/configs/load_config.py,sha256=
|
44
|
+
sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoHgQQ0,3107
|
44
45
|
sglang/srt/configs/model_config.py,sha256=sQIOfslBRzhOjucZdd8zE8nO9PEOc7zc6cZMbguQgoY,16876
|
45
46
|
sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
|
46
47
|
sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
|
@@ -52,27 +53,27 @@ sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUV
|
|
52
53
|
sglang/srt/distributed/parallel_state.py,sha256=rTqUtbm6eNNYzlMP8NQC55E842Agtf-g3cGPzqlfbh8,47527
|
53
54
|
sglang/srt/distributed/utils.py,sha256=U-BSaXYjWwnfG8g-tUfBhjKt5Ug097nyHtu3g3aea_Y,8473
|
54
55
|
sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
|
55
|
-
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=
|
56
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=KiAvIn1ar9CwyPE5oPMHqKkMbpxBAQYHonegifUcleA,15841
|
56
57
|
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=q2q1A_Sqvrvkrgf7Tjg5XhXR1JWzzUUPHSicAKK2SjE,11022
|
57
58
|
sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
|
58
59
|
sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--ZqapHtvm70Lgl7obtE6ZfgeAiU,10064
|
59
60
|
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
60
61
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
61
62
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
62
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
63
|
-
sglang/srt/entrypoints/http_server.py,sha256=
|
64
|
-
sglang/srt/layers/activation.py,sha256=
|
63
|
+
sglang/srt/entrypoints/engine.py,sha256=2qTOwQV1o1lWb1RkeQiie1HfA6rQW3szeqbB92r6J9Q,16840
|
64
|
+
sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
|
65
|
+
sglang/srt/layers/activation.py,sha256=DLu5et46aCzXKYblCu5WxDMhSY3Uv-I2id3pGBk6u34,5448
|
65
66
|
sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
|
66
|
-
sglang/srt/layers/dp_attention.py,sha256=
|
67
|
-
sglang/srt/layers/layernorm.py,sha256=
|
68
|
-
sglang/srt/layers/linear.py,sha256=
|
69
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
70
|
-
sglang/srt/layers/parameter.py,sha256=
|
67
|
+
sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
|
68
|
+
sglang/srt/layers/layernorm.py,sha256=sn73s-OhXyxcSmjRUPi0Po6EQ10ZpDee9Bpn6YasnZo,4014
|
69
|
+
sglang/srt/layers/linear.py,sha256=u9ND-ZRv_uq8I3bxqbN11bus-kr5DHqdo67ksFA0EcU,50724
|
70
|
+
sglang/srt/layers/logits_processor.py,sha256=_3TZNUbvjmw63ywBv6V6WU87G1TErMaXGa7-VNExM1E,12190
|
71
|
+
sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
|
71
72
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
72
73
|
sglang/srt/layers/radix_attention.py,sha256=tPjJA3P9kuFBk2QWFTgOI8UbVUFLVDZgFaQWuokx894,2234
|
73
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
74
|
-
sglang/srt/layers/sampler.py,sha256=
|
75
|
-
sglang/srt/layers/torchao_utils.py,sha256=
|
74
|
+
sglang/srt/layers/rotary_embedding.py,sha256=CdnkPxUtef-o29i1G2p1an3H7sEgScWLvI_XVoMaPbo,43444
|
75
|
+
sglang/srt/layers/sampler.py,sha256=T_Lvjc7PhmOUhNAeSoI14DG9EO4XFve0z3wgWYy7YLU,9769
|
76
|
+
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
76
77
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
|
77
78
|
sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
|
78
79
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jHKHZhhgHuV0Fla_BPzzoo9JfT4,9231
|
@@ -84,14 +85,14 @@ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSY
|
|
84
85
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
85
86
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
|
86
87
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
|
87
|
-
sglang/srt/layers/moe/fused_moe_native.py,sha256=
|
88
|
+
sglang/srt/layers/moe/fused_moe_native.py,sha256=OEWpM93X5tJG4-rwz5qmdpTzEUR73zun29YRV3bZglY,4269
|
88
89
|
sglang/srt/layers/moe/topk.py,sha256=qcWDUVvEV6TIO_idymStylkpPp6dMk-wbYj2Zq4ZYJ0,7057
|
89
90
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
90
91
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
|
91
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
92
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=Z7Mq6QWC4a9fcnbFU6EbAZiQ9UVoK2SAqIZSkzCrTDY,22725
|
92
93
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
93
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
94
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=
|
94
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=HbMq88qli-4nxXZc_E6g9zyo2quLuJ5Yh84SUVuCPVc,37343
|
95
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2SkEj70LqTYsGFO9683U,22816
|
95
96
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
96
97
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
97
98
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -123,6 +124,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=feYhe2V6mu6J7bqG3AYr3fdFr
|
|
123
124
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
|
124
125
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
|
125
126
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
|
127
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yN6aItfVswTn6ggd1NJb7MmO9evCet5cRIiKeHcb2r8,3733
|
126
128
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
|
127
129
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
|
128
130
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=uv-RwTNZT2n264dLo4eWxUpB3g7QqUyf2MFEGiRvoqQ,3251
|
@@ -185,13 +187,14 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=feYhe2V6mu6J7bqG3AYr3fdFr
|
|
185
187
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
186
188
|
sglang/srt/layers/quantization/__init__.py,sha256=_Sba1KQnmZNKGDKM1MfBs2T3uDqOHfeW6IHO2mTUvfs,4471
|
187
189
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
188
|
-
sglang/srt/layers/quantization/fp8.py,sha256=
|
190
|
+
sglang/srt/layers/quantization/fp8.py,sha256=XjLGHFPgX0NBuXa8eOglZ8TPMvXMNges0l4gDdcumRE,34866
|
189
191
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
|
190
192
|
sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
|
191
193
|
sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
|
192
194
|
sglang/srt/layers/quantization/modelopt_quant.py,sha256=_VdVz77dTP-IczPeFrdH6Ttro2D26BZvMlZkCKWj_5o,6200
|
193
195
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
|
194
196
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
197
|
+
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
195
198
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
196
199
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
197
200
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
|
@@ -201,6 +204,7 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
201
204
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
|
202
205
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
|
203
206
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
|
207
|
+
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
204
208
|
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
|
205
209
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
|
206
210
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
|
@@ -208,11 +212,15 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
208
212
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
|
209
213
|
"sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
|
210
214
|
"sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
|
215
|
+
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
211
216
|
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
|
212
217
|
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
|
218
|
+
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
|
213
219
|
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
|
214
220
|
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
|
221
|
+
"sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
|
215
222
|
"sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
|
223
|
+
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
|
216
224
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
|
217
225
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
|
218
226
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
|
@@ -224,10 +232,13 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
224
232
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
|
225
233
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
|
226
234
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
|
235
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
227
236
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
|
228
237
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
|
238
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
|
229
239
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
|
230
240
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
|
241
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
|
231
242
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
232
243
|
sglang/srt/lora/lora.py,sha256=xS0YCrlEQb_LlU85TllesoMw0Td88voMU6DSZ9w80cs,14845
|
233
244
|
sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
|
@@ -235,14 +246,14 @@ sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rds
|
|
235
246
|
sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4wL9zoG8Xz8,10909
|
236
247
|
sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
|
237
248
|
sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
|
238
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=
|
249
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=A-tZi9VPkrIAVteQItYUY-07V1rWmySFHNcVf8qAdPI,9578
|
239
250
|
sglang/srt/managers/image_processor.py,sha256=dEjEWzrmJyEXhr5sKBw4BEUEjla8CNdkzFGfogPGmFY,19103
|
240
|
-
sglang/srt/managers/io_struct.py,sha256=
|
241
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
251
|
+
sglang/srt/managers/io_struct.py,sha256=1Z6MCVI1LN2lS_7e8WHkpVNT_LW62mE-jpZ2Jn_FAtE,18267
|
252
|
+
sglang/srt/managers/schedule_batch.py,sha256=oP6ygJUOmo6PuXcA_wecRvOOa_WdpwmIyCPSgJy4qAc,48743
|
242
253
|
sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
|
243
|
-
sglang/srt/managers/scheduler.py,sha256=
|
254
|
+
sglang/srt/managers/scheduler.py,sha256=QGGSau-ydbRzIFdCvE63Na-tpYKHJj_QL1d9raogvXc,70019
|
244
255
|
sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
|
245
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
256
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=TjhX0IeFCmk31PDmtVV7Ilc8rqI361XUf_p2KO3ai7s,38669
|
246
257
|
sglang/srt/managers/tp_worker.py,sha256=OiHpFR9Hy1GpgLEkTDsykBiFuv1VKmkjQS58gQVPQIs,8126
|
247
258
|
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7p6zREndc4a9fmYfqW4iY9IYANxdoAioaf0hU92-8Ow,8893
|
248
259
|
sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
|
@@ -253,27 +264,27 @@ sglang/srt/mem_cache/memory_pool.py,sha256=9ud97u1cXnN6O0qlR8tv8woN_20gqisTV6aBg
|
|
253
264
|
sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
|
254
265
|
sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
|
255
266
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
256
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
257
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
258
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
267
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=3_s7zmLn9d0pVCxZd43bYtPmgkbe1kcRaNZbryMMjPU,18520
|
268
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=IkeGLqxKnHDWtIz6RXx3g_sj8ttr_RXe46o2rO-geF8,14987
|
269
|
+
sglang/srt/model_executor/model_runner.py,sha256=sLP20xoMM_3eLg7gzaKWGKmQGnHrvnmXGvNqTgEAtjk,33043
|
259
270
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
260
|
-
sglang/srt/model_loader/loader.py,sha256=
|
271
|
+
sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
|
261
272
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
262
|
-
sglang/srt/model_loader/weight_utils.py,sha256=
|
273
|
+
sglang/srt/model_loader/weight_utils.py,sha256=DMhIVK9tQsLOaUwsNmT61x68V7UswErmO4DKhzR73m0,29642
|
263
274
|
sglang/srt/models/baichuan.py,sha256=I7o7DPny8sWG7dFtr0GTovXFwvncEUleaCVcNNiYnko,14907
|
264
275
|
sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,12897
|
265
|
-
sglang/srt/models/commandr.py,sha256=
|
266
|
-
sglang/srt/models/dbrx.py,sha256
|
276
|
+
sglang/srt/models/commandr.py,sha256=y8DFUW0NKbkoY2DP6nhgJ1f7F_ysjaHEkEnZYZW2zdk,14523
|
277
|
+
sglang/srt/models/dbrx.py,sha256=-L9QkUr_xuMuI6mn0AzG_VE1MqRXoaaFtD4r8UuAzkY,14789
|
267
278
|
sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
|
268
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
279
|
+
sglang/srt/models/deepseek_v2.py,sha256=1JfWeo0q0N7q9nRuz3pfM4QWTTGu3e1qk2957X1x328,38499
|
269
280
|
sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
|
270
281
|
sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
|
271
|
-
sglang/srt/models/gemma2.py,sha256=
|
282
|
+
sglang/srt/models/gemma2.py,sha256=cyQfby-kp2OZPsUACmBh3-jsXkYwQg9Tj6xqtZ7mTwM,15947
|
272
283
|
sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb-_Hq8,2494
|
273
284
|
sglang/srt/models/gpt2.py,sha256=chg-5MfAl_gRYXMVrnKkWxY6zA09LEEEIdnWnnZn7N4,9367
|
274
285
|
sglang/srt/models/gpt_bigcode.py,sha256=DH8p76sPPhsxmNz2Dh5Vi5xQmLP_gEZDilIOTlHsEuw,9534
|
275
286
|
sglang/srt/models/granite.py,sha256=3HqQXJlfoKd11w1NCpTYmiPO9HlkA1jJqoAmuTzHuU0,20406
|
276
|
-
sglang/srt/models/grok.py,sha256=
|
287
|
+
sglang/srt/models/grok.py,sha256=NXC0I5_wXmlQ0-gMWgiT-X9ebzOsrTJGcltAXkY6064,18030
|
277
288
|
sglang/srt/models/internlm2.py,sha256=INGGwSCYKoZRAokXJC78RKKde2fgHn9P4JG-N37Pfn0,12124
|
278
289
|
sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
|
279
290
|
sglang/srt/models/llama.py,sha256=YKtLpxgk_OmPRpBJSr1BCTWC6IILFzZtakKnWYYHKw0,22040
|
@@ -284,7 +295,7 @@ sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L
|
|
284
295
|
sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
|
285
296
|
sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
|
286
297
|
sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
|
287
|
-
sglang/srt/models/minicpm3.py,sha256=
|
298
|
+
sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
|
288
299
|
sglang/srt/models/minicpmv.py,sha256=lgWqj1bWMDvPHPE5POVEjhnY-_qMSidkbsBLMYBtDgM,43181
|
289
300
|
sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
|
290
301
|
sglang/srt/models/mixtral.py,sha256=ybArp6vx7VTrjQ3kqH1FHJ1gQzsFPI5vv1C-Pnix6ws,14520
|
@@ -301,24 +312,24 @@ sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhk
|
|
301
312
|
sglang/srt/models/qwen2_vl.py,sha256=r0OmFH8OcsIZ96fKqXaAWGLUe6oTVW_w6Gt5PChYUXE,23139
|
302
313
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
303
314
|
sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
|
304
|
-
sglang/srt/models/torch_native_llama.py,sha256=
|
315
|
+
sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
|
305
316
|
sglang/srt/models/xverse.py,sha256=sYSSbwB_VC6uGzxkzNHluaJzvSfQXCxQG_OsrIWLWvU,13549
|
306
317
|
sglang/srt/models/xverse_moe.py,sha256=vN486GkRHvgyRgSW2e_zTOQHDkWx86lthahtKxl6M10,15511
|
307
318
|
sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
|
308
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
309
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
319
|
+
sglang/srt/openai_api/adapter.py,sha256=yQaAiZ43w3OIsNj8nC6BEjt9DPE5IZ-ovLAd0r7LbcY,61716
|
320
|
+
sglang/srt/openai_api/protocol.py,sha256=UInFUKQqS8KWLrCzA6s5_uaNC6xAUAAJ4WepQzQ7xpo,11845
|
310
321
|
sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
|
311
|
-
sglang/srt/sampling/sampling_batch_info.py,sha256=
|
322
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=Ry1N79T9QQY_HJ8GjM50_W4tzKFxMtTfV4GccT7NQ0w,15129
|
312
323
|
sglang/srt/sampling/sampling_params.py,sha256=NCw0zLAnu8u_vQ8QD1RbEw1F9gc7sDWnBCrXn2DEp6o,5788
|
313
324
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
314
325
|
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
|
315
326
|
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
|
316
327
|
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
|
317
328
|
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
|
318
|
-
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=
|
329
|
+
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8n_F4Z6Jd8mZKfF23buuLZ5dWuVfyqDWkUI,2968
|
319
330
|
sglang/srt/speculative/build_eagle_tree.py,sha256=SIKuOFUOIzMLyanL5vViPmFBEiUHm_ezwiGuIyLmauE,9886
|
320
|
-
sglang/srt/speculative/eagle_utils.py,sha256=
|
321
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
331
|
+
sglang/srt/speculative/eagle_utils.py,sha256=4SjIfjL0dSnVFR9YMp75W8vMtMBJVvp4-SVtK1v9oM8,24262
|
332
|
+
sglang/srt/speculative/eagle_worker.py,sha256=KMBMjR8SFFRLYNCokKV4liBZrFP_edAEFbTLgyCoTA0,7754
|
322
333
|
sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
|
323
334
|
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
324
335
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
@@ -333,11 +344,11 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
333
344
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
334
345
|
sglang/test/test_block_fp8.py,sha256=rhrIun8aW5zq2qvuGRlo7F7aZ_upjVxtQMVlyc2Th_E,11771
|
335
346
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
336
|
-
sglang/test/test_programs.py,sha256=
|
337
|
-
sglang/test/test_utils.py,sha256=
|
347
|
+
sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
|
348
|
+
sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
|
338
349
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
339
|
-
sglang-0.4.
|
340
|
-
sglang-0.4.
|
341
|
-
sglang-0.4.
|
342
|
-
sglang-0.4.
|
343
|
-
sglang-0.4.
|
350
|
+
sglang-0.4.2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
351
|
+
sglang-0.4.2.dist-info/METADATA,sha256=tSq4P2Rhddw25uHR313Islv7DCbaPFSUrZzD8C8_pas,23224
|
352
|
+
sglang-0.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
353
|
+
sglang-0.4.2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
354
|
+
sglang-0.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|