sglang 0.4.2.post1__py3-none-any.whl → 0.4.2.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/constrained/outlines_backend.py +9 -1
- sglang/srt/custom_op.py +40 -0
- sglang/srt/entrypoints/engine.py +2 -2
- sglang/srt/layers/activation.py +10 -5
- sglang/srt/layers/attention/flashinfer_backend.py +284 -39
- sglang/srt/layers/attention/triton_backend.py +71 -7
- sglang/srt/layers/attention/triton_ops/decode_attention.py +53 -59
- sglang/srt/layers/layernorm.py +1 -5
- sglang/srt/layers/moe/ep_moe/layer.py +1 -3
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json +200 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json +200 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json +200 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +178 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json +200 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +175 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +3 -11
- sglang/srt/layers/moe/fused_moe_triton/layer.py +1 -3
- sglang/srt/layers/moe/topk.py +4 -0
- sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/fp8_kernel.py +140 -2
- sglang/srt/layers/rotary_embedding.py +1 -3
- sglang/srt/layers/sampler.py +4 -4
- sglang/srt/lora/backend/__init__.py +8 -0
- sglang/srt/lora/backend/base_backend.py +95 -0
- sglang/srt/lora/backend/flashinfer_backend.py +91 -0
- sglang/srt/lora/backend/triton_backend.py +61 -0
- sglang/srt/lora/lora.py +127 -112
- sglang/srt/lora/lora_manager.py +50 -18
- sglang/srt/lora/triton_ops/__init__.py +5 -0
- sglang/srt/lora/triton_ops/qkv_lora_b.py +182 -0
- sglang/srt/lora/triton_ops/sgemm_lora_a.py +143 -0
- sglang/srt/lora/triton_ops/sgemm_lora_b.py +159 -0
- sglang/srt/model_executor/cuda_graph_runner.py +77 -80
- sglang/srt/model_executor/forward_batch_info.py +58 -59
- sglang/srt/model_executor/model_runner.py +2 -2
- sglang/srt/models/qwen2_vl.py +1 -1
- sglang/srt/server_args.py +13 -2
- sglang/srt/speculative/build_eagle_tree.py +4 -2
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +213 -0
- sglang/srt/speculative/eagle_utils.py +361 -372
- sglang/srt/speculative/eagle_worker.py +177 -45
- sglang/srt/utils.py +7 -0
- sglang/test/runners.py +2 -0
- sglang/version.py +1 -1
- {sglang-0.4.2.post1.dist-info → sglang-0.4.2.post2.dist-info}/METADATA +15 -6
- {sglang-0.4.2.post1.dist-info → sglang-0.4.2.post2.dist-info}/RECORD +72 -33
- sglang/srt/layers/custom_op_util.py +0 -25
- {sglang-0.4.2.post1.dist-info → sglang-0.4.2.post2.dist-info}/LICENSE +0 -0
- {sglang-0.4.2.post1.dist-info → sglang-0.4.2.post2.dist-info}/WHEEL +0 -0
- {sglang-0.4.2.post1.dist-info → sglang-0.4.2.post2.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,7 @@ sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
12
12
|
sglang/utils.py,sha256=7HpOrPBhMivWH719m7Dy1rjrAXOAsnqelpwNBBbvjqs,13319
|
13
|
-
sglang/version.py,sha256=
|
13
|
+
sglang/version.py,sha256=64ujEkLGOA9yAnhsrnI5zBOk5lJIP4Z-b7gpnc9vbUo,28
|
14
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
|
16
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
@@ -28,14 +28,15 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
|
|
28
28
|
sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
|
29
29
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
30
30
|
sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
|
31
|
+
sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
|
31
32
|
sglang/srt/function_call_parser.py,sha256=HMqCCd-YQeyADV_gTCduF9gmw2k3bRAkoJYcFsK3w3c,19230
|
32
33
|
sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
|
33
34
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
34
35
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
35
36
|
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
36
|
-
sglang/srt/server_args.py,sha256=
|
37
|
+
sglang/srt/server_args.py,sha256=A36_q9RKNueSfsBz-NAIGfa4YvCFHfhIfuHNS_4NzK0,40363
|
37
38
|
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
38
|
-
sglang/srt/utils.py,sha256=
|
39
|
+
sglang/srt/utils.py,sha256=G8YDJ5FKRXiVDrsb30nulpogmbsVwTWx22YRT9T5inI,46529
|
39
40
|
sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
|
40
41
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
41
42
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
@@ -45,7 +46,7 @@ sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoH
|
|
45
46
|
sglang/srt/configs/model_config.py,sha256=sQIOfslBRzhOjucZdd8zE8nO9PEOc7zc6cZMbguQgoY,16876
|
46
47
|
sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
|
47
48
|
sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
|
48
|
-
sglang/srt/constrained/outlines_backend.py,sha256=
|
49
|
+
sglang/srt/constrained/outlines_backend.py,sha256=kNeJzr2NRqBy8AMiUlZUly0FmpR0raMB59QRhlfY_Tc,6970
|
49
50
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
|
50
51
|
sglang/srt/constrained/xgrammar_backend.py,sha256=l-37tdrPsp7xnxZpY8_0W1DnZSiBAH9e-BcwiAO8b0g,5048
|
51
52
|
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
@@ -60,39 +61,38 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
|
|
60
61
|
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
61
62
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
62
63
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
63
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
64
|
+
sglang/srt/entrypoints/engine.py,sha256=hYLhAJpWMrz_8RuXuYIhFUBbYp4JFEQn8YwQDcnFz-k,16853
|
64
65
|
sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
|
65
|
-
sglang/srt/layers/activation.py,sha256=
|
66
|
-
sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
|
66
|
+
sglang/srt/layers/activation.py,sha256=f9KGwGi2znUx5SFKH_vO8htpBkfQ550VZZIycFDfPlk,5602
|
67
67
|
sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
|
68
|
-
sglang/srt/layers/layernorm.py,sha256=
|
68
|
+
sglang/srt/layers/layernorm.py,sha256=2_9XCR2l18c3jMrbTgfQ350C1gc9Ua_z965KRxJHJeQ,3858
|
69
69
|
sglang/srt/layers/linear.py,sha256=u9ND-ZRv_uq8I3bxqbN11bus-kr5DHqdo67ksFA0EcU,50724
|
70
70
|
sglang/srt/layers/logits_processor.py,sha256=_3TZNUbvjmw63ywBv6V6WU87G1TErMaXGa7-VNExM1E,12190
|
71
71
|
sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
|
72
72
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
73
73
|
sglang/srt/layers/radix_attention.py,sha256=tPjJA3P9kuFBk2QWFTgOI8UbVUFLVDZgFaQWuokx894,2234
|
74
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
75
|
-
sglang/srt/layers/sampler.py,sha256=
|
74
|
+
sglang/srt/layers/rotary_embedding.py,sha256=ek92uQVjR9nQW1OHE9C5SwLinoCCk1G68jUSImo65HI,43980
|
75
|
+
sglang/srt/layers/sampler.py,sha256=FIkh6sh91Fh5R8QJ6x66bJ8Y-xl5EfT4XVPXGXJ1l7I,9937
|
76
76
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
77
77
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
|
78
78
|
sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
|
79
79
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jHKHZhhgHuV0Fla_BPzzoo9JfT4,9231
|
80
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
80
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=9BJEAQ5IcSMGvPfa6_D3cP9Gbo2XQ5GHBnF7cw2Rsng,42933
|
81
81
|
sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
|
82
|
-
sglang/srt/layers/attention/triton_backend.py,sha256=
|
82
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=PZU496wPzyRopA600riR5sxz-gIz8u9TYz0MzmMuX5Y,8858
|
83
83
|
sglang/srt/layers/attention/vision.py,sha256=zLjKmzUlkgq1RFcP3b4EPArOAKovoaDLgYfM5SyB2wM,13181
|
84
|
-
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=
|
84
|
+
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=tcUAdacBWTpZmro7vZeRPasfwRWFlCR4bxfGpFOYgZ8,17831
|
85
85
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
86
86
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
|
87
87
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
|
88
88
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=OEWpM93X5tJG4-rwz5qmdpTzEUR73zun29YRV3bZglY,4269
|
89
|
-
sglang/srt/layers/moe/topk.py,sha256=
|
89
|
+
sglang/srt/layers/moe/topk.py,sha256=6A4W1ztlV2dQvkXcPJvFvAg0QEhE58Q7eE7iw8N36J4,7230
|
90
90
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
91
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
|
92
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
92
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=aS8t1XUvlTnO9IQaxGjW5bOXP4FrJDXzymEIvlIDMro,22603
|
93
93
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
94
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
95
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256
|
94
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=_uUn0EKf_trPyKP4jfCytKKgnC3ziUM_X9L_PbaXhbE,37241
|
95
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ruK09nIJqGHKNa8CJsws,22691
|
96
96
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
97
97
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
98
98
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -125,6 +125,8 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2S
|
|
125
125
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
|
126
126
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
|
127
127
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yN6aItfVswTn6ggd1NJb7MmO9evCet5cRIiKeHcb2r8,3733
|
128
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yN6aItfVswTn6ggd1NJb7MmO9evCet5cRIiKeHcb2r8,3733
|
129
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=IMTKnPRjhSptf7smIkpqmMjSML9SQ7I8CpkbR3Inzqk,3258
|
128
130
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
|
129
131
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
|
130
132
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=uv-RwTNZT2n264dLo4eWxUpB3g7QqUyf2MFEGiRvoqQ,3251
|
@@ -147,10 +149,12 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2S
|
|
147
149
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=eD3Y9sOwHFcgVdOfya8KxPhvLx_b4whfEWm4d8Y2HW8,3268
|
148
150
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=KIfpZydSl31FOEqq0EBfxTyWRj1QTDwTjkPHFjNO3_A,3253
|
149
151
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=OnadAdmDbX17Ni9VPrNXYSsxYhbtBeniCxxhhb0UmUk,4733
|
152
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json",sha256=OnadAdmDbX17Ni9VPrNXYSsxYhbtBeniCxxhhb0UmUk,4733
|
150
153
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
|
151
154
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=LD4Z5MRR5Ivi4bYB5hMgymtvmFyVJwq6gmehA7fzecc,3271
|
152
155
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=GLIH4egg-pE-NWU5XqKuJCoRXciHN6GSc3NaE4PaeYg,3261
|
153
156
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=bKsYVJm-IvWHWpxUG-lMPkyNz0nQpDb4UEIv895c9JI,4730
|
157
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json",sha256=bKsYVJm-IvWHWpxUG-lMPkyNz0nQpDb4UEIv895c9JI,4730
|
154
158
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
|
155
159
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
|
156
160
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
|
@@ -162,6 +166,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2S
|
|
162
166
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=4O4VeMpgFNrqWyWqWgYgcYAgBQnOlAXvt26CRSXK-sY,3270
|
163
167
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=qfjbXqbl902TuiyzzomUy2sMvs-Dud8ZphDRY5WIPBM,3260
|
164
168
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=_bw1_oads8tz51i4RVQUAjNi8r3b2Q2jPbi50TLFzlY,4732
|
169
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json",sha256=_bw1_oads8tz51i4RVQUAjNi8r3b2Q2jPbi50TLFzlY,4732
|
165
170
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
|
166
171
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
|
167
172
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4Q_-yITMfijOMoguUM2n96clARh-DUFsS-4oW_a3Jpc,3252
|
@@ -171,56 +176,73 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2S
|
|
171
176
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=Pi2coJlJlpgqXiPRd77B_eCmmi7sCdBuoSGK1RA5YO8,3258
|
172
177
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=p2qlRhTt7owWB8keEmoCrPZpo39IAxsKnULFQ7R38SI,3873
|
173
178
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=AKIX43JVc26ERb862pNOMEfGhsgyk1OGa42EptAfG1s,4409
|
179
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=AKIX43JVc26ERb862pNOMEfGhsgyk1OGa42EptAfG1s,4409
|
174
180
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
|
175
181
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
|
176
182
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
|
177
183
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=rR8b-OuQ3watb8b2zuNlxKDSZpzlAagm9nb-FdKkt7s,3270
|
178
184
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=8G_QqV_DhvZ6xSavMSpeE6qcXPVpsVjEtJabydybKqY,3263
|
179
185
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=54KpHTMGt_zDQHqbdopuVHPpiI44ZsN_5LBUBZ_woY4,4733
|
186
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json",sha256=54KpHTMGt_zDQHqbdopuVHPpiI44ZsN_5LBUBZ_woY4,4733
|
180
187
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
|
181
188
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
|
182
189
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
|
183
190
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=3YQakSmUKhpw1KO7Hn-tEc-yyD1fEj01_6JlSYnrrlI,3274
|
184
191
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=W2ka_U8pzwjzX62NEGKXR32uuSR_zfHD1XjXYf5bgBs,3262
|
185
192
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
|
193
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
|
186
194
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
187
195
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
188
196
|
sglang/srt/layers/quantization/__init__.py,sha256=_Sba1KQnmZNKGDKM1MfBs2T3uDqOHfeW6IHO2mTUvfs,4471
|
189
197
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
190
198
|
sglang/srt/layers/quantization/fp8.py,sha256=ibttPVCUsCQ0LXy7FUb8wnzqGcGZQXQLqwCB4a2fai4,35160
|
191
|
-
sglang/srt/layers/quantization/fp8_kernel.py,sha256=
|
199
|
+
sglang/srt/layers/quantization/fp8_kernel.py,sha256=gm_mDHeBswggsaNMEo0niayx9DS2mGJkLkrnnOYQnlU,15709
|
192
200
|
sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
|
193
201
|
sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
|
194
202
|
sglang/srt/layers/quantization/modelopt_quant.py,sha256=_VdVz77dTP-IczPeFrdH6Ttro2D26BZvMlZkCKWj_5o,6200
|
195
203
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
|
196
204
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
197
205
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
206
|
+
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
198
207
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
199
208
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
200
209
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
|
201
210
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
|
202
211
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
|
203
212
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
|
213
|
+
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
|
204
214
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
|
205
215
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
|
206
216
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
|
207
217
|
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
218
|
+
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
219
|
+
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
|
208
220
|
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
|
221
|
+
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
|
209
222
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
|
210
223
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
|
224
|
+
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
|
211
225
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
|
212
226
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
|
213
227
|
"sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
|
214
228
|
"sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
|
215
229
|
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
230
|
+
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
231
|
+
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
|
216
232
|
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
|
217
233
|
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
|
218
234
|
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
|
235
|
+
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
|
236
|
+
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
|
219
237
|
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
|
220
238
|
"sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
|
221
239
|
"sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
|
240
|
+
"sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
|
241
|
+
"sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
|
222
242
|
"sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
|
223
243
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
|
244
|
+
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
|
245
|
+
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
|
224
246
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
|
225
247
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
|
226
248
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
|
@@ -228,21 +250,37 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
228
250
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
|
229
251
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
|
230
252
|
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
|
253
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
|
231
254
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
|
232
255
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
|
256
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
|
233
257
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
|
234
258
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
|
235
259
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
260
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
261
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
|
236
262
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
|
237
263
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
|
238
264
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
|
265
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
|
266
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
|
239
267
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
|
240
268
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
|
241
269
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
|
270
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
|
271
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
|
242
272
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
243
|
-
sglang/srt/lora/lora.py,sha256=
|
273
|
+
sglang/srt/lora/lora.py,sha256=71sVFPB23THA4O06v2_G91vaY3MhAchsti-LErycJ6g,14869
|
244
274
|
sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
|
245
|
-
sglang/srt/lora/lora_manager.py,sha256=
|
275
|
+
sglang/srt/lora/lora_manager.py,sha256=fa-Sy4b3k_YzJJY5XQgxCX5AycnraF-f600jDBaXKzo,13991
|
276
|
+
sglang/srt/lora/backend/__init__.py,sha256=ul5yTkHrwDTv8RZXu_Miw5_XpBlxqKeGmLfUT6Ph7EE,211
|
277
|
+
sglang/srt/lora/backend/base_backend.py,sha256=MZfUhIwujwDyN2hFoVMB3N_gJokel6aU4Scc8fv5_L0,3575
|
278
|
+
sglang/srt/lora/backend/flashinfer_backend.py,sha256=qXqtt4w_-Gqkk-j27CHHI1Jeu0gb8jr0ngI54eCNdqI,2845
|
279
|
+
sglang/srt/lora/backend/triton_backend.py,sha256=MzVZYtPXwqS8DfeFMIpI0xNI90oVHcasaT3ppea9Vns,1722
|
280
|
+
sglang/srt/lora/triton_ops/__init__.py,sha256=F4NrXZ4bSD39hkPVkby6OR-0Ap7dOOUsB0BjqjGMjy4,195
|
281
|
+
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=gFO53wGm93GpDmFaJgqyD1tgmrDllZ2bCBhGczZg0_4,5664
|
282
|
+
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=GvPFXGXC2qLh4djlIIyAvobo9VLJcSEtyjcaveQh0Cw,3981
|
283
|
+
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=pZxVJOMaf0OwMUJAw37KA4ZcoQb8gLpFGbYPirX8Oqk,4298
|
246
284
|
sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4wL9zoG8Xz8,10909
|
247
285
|
sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
|
248
286
|
sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
|
@@ -264,9 +302,9 @@ sglang/srt/mem_cache/memory_pool.py,sha256=9ud97u1cXnN6O0qlR8tv8woN_20gqisTV6aBg
|
|
264
302
|
sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll53MSLeU,11754
|
265
303
|
sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
|
266
304
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
267
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
268
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
269
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
305
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=Tv9ktSMaHNcAUUU2ERmxouFc6c94qbNMGt7_usQCD3w,18202
|
306
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=t1RlBgoeS-_Ikl28Xjvt-aouh1nNUc3eLM4iGY4_QqY,14988
|
307
|
+
sglang/srt/model_executor/model_runner.py,sha256=5ET8CXwSrzIKb_rDW-6S0p7tv-iCPoEwZk3dESwPdF8,33090
|
270
308
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
271
309
|
sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
|
272
310
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
@@ -309,7 +347,7 @@ sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,990
|
|
309
347
|
sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
|
310
348
|
sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
|
311
349
|
sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
|
312
|
-
sglang/srt/models/qwen2_vl.py,sha256=
|
350
|
+
sglang/srt/models/qwen2_vl.py,sha256=d8jCZyoJ6sMZ8-Pw9bHb3VbwiZlbFzJvffA4yYBRj0U,23466
|
313
351
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
314
352
|
sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
|
315
353
|
sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
|
@@ -327,14 +365,15 @@ sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD6
|
|
327
365
|
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
|
328
366
|
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
|
329
367
|
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8n_F4Z6Jd8mZKfF23buuLZ5dWuVfyqDWkUI,2968
|
330
|
-
sglang/srt/speculative/build_eagle_tree.py,sha256=
|
331
|
-
sglang/srt/speculative/
|
332
|
-
sglang/srt/speculative/
|
368
|
+
sglang/srt/speculative/build_eagle_tree.py,sha256=GlHDIbaZInhf1LYuWVmkVCvPcd6sKDxeXafLTai7Zpw,9892
|
369
|
+
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=5ZCy6ndPA2p95xDgo2kXWD3zCtVaq4q5X0HBpAbB3Xs,7929
|
370
|
+
sglang/srt/speculative/eagle_utils.py,sha256=DUANfRddoQ4MqNuQW3Uz21qWRaetYf4prVcljZR0tyI,22945
|
371
|
+
sglang/srt/speculative/eagle_worker.py,sha256=4oROLwUBJIwEHNHNEfvsy74DqLQLVc4KfjdR-MrB1OM,12038
|
333
372
|
sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
|
334
373
|
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
335
374
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
336
375
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
337
|
-
sglang/test/runners.py,sha256=
|
376
|
+
sglang/test/runners.py,sha256=f235MQA5P0Uw4mXSHzDAFAlxHSqFi1cL5SdA49kPyCA,15017
|
338
377
|
sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
|
339
378
|
sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
|
340
379
|
sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
|
@@ -347,8 +386,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
|
|
347
386
|
sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
|
348
387
|
sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
|
349
388
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
350
|
-
sglang-0.4.2.
|
351
|
-
sglang-0.4.2.
|
352
|
-
sglang-0.4.2.
|
353
|
-
sglang-0.4.2.
|
354
|
-
sglang-0.4.2.
|
389
|
+
sglang-0.4.2.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
390
|
+
sglang-0.4.2.post2.dist-info/METADATA,sha256=-Cl1_HFESAZxOXkBnaqDshP2M93b_4FWYGVh-1Yyw3s,23763
|
391
|
+
sglang-0.4.2.post2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
392
|
+
sglang-0.4.2.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
393
|
+
sglang-0.4.2.post2.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# Copyright 2023-2024 SGLang Team
|
2
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
|
-
# you may not use this file except in compliance with the License.
|
4
|
-
# You may obtain a copy of the License at
|
5
|
-
#
|
6
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
-
#
|
8
|
-
# Unless required by applicable law or agreed to in writing, software
|
9
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
10
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
11
|
-
# See the License for the specific language governing permissions and
|
12
|
-
# limitations under the License.
|
13
|
-
# ==============================================================================
|
14
|
-
|
15
|
-
from vllm.model_executor.custom_op import CustomOp
|
16
|
-
|
17
|
-
|
18
|
-
def register_custom_op(op_name):
|
19
|
-
def decorator(cls):
|
20
|
-
if hasattr(CustomOp, "register"):
|
21
|
-
return CustomOp.register(op_name)(cls)
|
22
|
-
else:
|
23
|
-
return cls
|
24
|
-
|
25
|
-
return decorator
|
File without changes
|
File without changes
|
File without changes
|