sglang 0.4.2.post3__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. sglang/check_env.py +1 -0
  2. sglang/global_config.py +2 -0
  3. sglang/srt/constrained/outlines_backend.py +4 -1
  4. sglang/srt/entrypoints/engine.py +2 -2
  5. sglang/srt/layers/attention/flashinfer_backend.py +265 -147
  6. sglang/srt/layers/attention/triton_backend.py +358 -72
  7. sglang/srt/layers/attention/triton_ops/extend_attention.py +4 -4
  8. sglang/srt/layers/linear.py +12 -5
  9. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +2 -2
  10. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  11. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +2 -2
  12. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +200 -0
  13. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +200 -0
  14. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +200 -0
  15. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +178 -0
  16. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +200 -0
  17. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +175 -0
  18. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +27 -5
  19. sglang/srt/layers/moe/fused_moe_triton/layer.py +2 -0
  20. sglang/srt/layers/moe/topk.py +1 -1
  21. sglang/srt/layers/quantization/__init__.py +51 -5
  22. sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  23. sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  24. sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +30 -30
  25. sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  26. sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  27. sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  28. sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  29. sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  30. sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  31. sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +29 -29
  32. sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  33. sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  34. sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +33 -33
  35. sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  36. sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +31 -31
  37. sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  38. sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +27 -27
  39. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  40. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +31 -31
  41. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  42. sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  43. sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  44. sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  45. sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  46. sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  47. sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  48. sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +24 -24
  49. sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  50. sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +30 -30
  51. sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
  52. sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +42 -42
  53. sglang/srt/layers/quantization/fp8_kernel.py +123 -17
  54. sglang/srt/layers/quantization/fp8_utils.py +33 -4
  55. sglang/srt/lora/backend/__init__.py +25 -5
  56. sglang/srt/lora/backend/base_backend.py +31 -9
  57. sglang/srt/lora/backend/flashinfer_backend.py +41 -4
  58. sglang/srt/lora/backend/triton_backend.py +34 -4
  59. sglang/srt/lora/layers.py +293 -0
  60. sglang/srt/lora/lora.py +101 -326
  61. sglang/srt/lora/lora_manager.py +101 -269
  62. sglang/srt/lora/mem_pool.py +174 -0
  63. sglang/srt/lora/triton_ops/__init__.py +7 -1
  64. sglang/srt/lora/triton_ops/gate_up_lora_b.py +170 -0
  65. sglang/srt/lora/triton_ops/qkv_lora_b.py +5 -5
  66. sglang/srt/lora/triton_ops/sgemm_lora_a.py +2 -2
  67. sglang/srt/lora/triton_ops/sgemm_lora_b.py +2 -2
  68. sglang/srt/lora/utils.py +141 -0
  69. sglang/srt/managers/detokenizer_manager.py +1 -0
  70. sglang/srt/managers/io_struct.py +4 -0
  71. sglang/srt/managers/schedule_batch.py +16 -3
  72. sglang/srt/managers/scheduler.py +29 -0
  73. sglang/srt/managers/tokenizer_manager.py +6 -0
  74. sglang/srt/managers/tp_worker_overlap_thread.py +4 -0
  75. sglang/srt/model_executor/cuda_graph_runner.py +16 -1
  76. sglang/srt/model_executor/model_runner.py +12 -2
  77. sglang/srt/models/deepseek_v2.py +17 -7
  78. sglang/srt/server_args.py +20 -1
  79. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +1 -0
  80. sglang/srt/speculative/eagle_utils.py +64 -21
  81. sglang/srt/speculative/eagle_worker.py +29 -8
  82. sglang/srt/utils.py +7 -0
  83. sglang/version.py +1 -1
  84. {sglang-0.4.2.post3.dist-info → sglang-0.4.3.dist-info}/METADATA +6 -5
  85. {sglang-0.4.2.post3.dist-info → sglang-0.4.3.dist-info}/RECORD +88 -55
  86. {sglang-0.4.2.post3.dist-info → sglang-0.4.3.dist-info}/LICENSE +0 -0
  87. {sglang-0.4.2.post3.dist-info → sglang-0.4.3.dist-info}/WHEEL +0 -0
  88. {sglang-0.4.2.post3.dist-info → sglang-0.4.3.dist-info}/top_level.txt +0 -0
@@ -5,12 +5,12 @@ sglang/bench_offline_throughput.py,sha256=vIoF87HIpezB1x-xWzUl6SdXi88Fza8g4hDU7G
5
5
  sglang/bench_one_batch.py,sha256=d-LuRHEyDZjh180OCN5fqTjr8Zusk3zc0vhoJ33x0B0,17905
6
6
  sglang/bench_one_batch_server.py,sha256=iu73SsvYwnuRktYZDz1P6psMiRx8MbEbF5sbsYJdzYg,5962
7
7
  sglang/bench_serving.py,sha256=jYU3rYIDkzpYhjSpJw_IkEs_UNQfouNW4phs3z5TObc,54303
8
- sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
9
- sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
8
+ sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
9
+ sglang/global_config.py,sha256=crt5cernXnDa1iQ8kGOq_ScTFclRlTQbJ-atFHM7I5I,1330
10
10
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
12
12
  sglang/utils.py,sha256=7HpOrPBhMivWH719m7Dy1rjrAXOAsnqelpwNBBbvjqs,13319
13
- sglang/version.py,sha256=08dwZ-8Pb-Ir0QXBY3R8hBlzHyVuy4icqVMBMJri3oM,28
13
+ sglang/version.py,sha256=Nyg0pmk5ea9-SLCAFEIF96ByFx4-TJFtrqYPN-Zn6g4,22
14
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
16
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -34,9 +34,9 @@ sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mv
34
34
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
35
35
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
36
36
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
37
- sglang/srt/server_args.py,sha256=A36_q9RKNueSfsBz-NAIGfa4YvCFHfhIfuHNS_4NzK0,40363
37
+ sglang/srt/server_args.py,sha256=97YAjIbttBjty9Rv9CQ-yGERprFRSu2Jq0zX8Yx-QH8,41018
38
38
  sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
39
- sglang/srt/utils.py,sha256=G8YDJ5FKRXiVDrsb30nulpogmbsVwTWx22YRT9T5inI,46529
39
+ sglang/srt/utils.py,sha256=RVU-OORgeVQICMPzj17KHxbDdSYGOKFBnNR4dZejP9A,46780
40
40
  sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
41
41
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
42
42
  sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
@@ -46,7 +46,7 @@ sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoH
46
46
  sglang/srt/configs/model_config.py,sha256=sQIOfslBRzhOjucZdd8zE8nO9PEOc7zc6cZMbguQgoY,16876
47
47
  sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
48
48
  sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
49
- sglang/srt/constrained/outlines_backend.py,sha256=kNeJzr2NRqBy8AMiUlZUly0FmpR0raMB59QRhlfY_Tc,6970
49
+ sglang/srt/constrained/outlines_backend.py,sha256=yPYgz44n-rSCStGGkS1lGazFiQzN7gqwSvpJ2YG0co4,7081
50
50
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
51
51
  sglang/srt/constrained/xgrammar_backend.py,sha256=l-37tdrPsp7xnxZpY8_0W1DnZSiBAH9e-BcwiAO8b0g,5048
52
52
  sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
@@ -61,12 +61,12 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
61
61
  sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
62
62
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
63
63
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
64
- sglang/srt/entrypoints/engine.py,sha256=hYLhAJpWMrz_8RuXuYIhFUBbYp4JFEQn8YwQDcnFz-k,16853
64
+ sglang/srt/entrypoints/engine.py,sha256=3JL7aj0K5UHCBsqCOG2MTLAv-8IpwtN2K-3_xB6fNqk,16888
65
65
  sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
66
66
  sglang/srt/layers/activation.py,sha256=f9KGwGi2znUx5SFKH_vO8htpBkfQ550VZZIycFDfPlk,5602
67
67
  sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
68
68
  sglang/srt/layers/layernorm.py,sha256=2_9XCR2l18c3jMrbTgfQ350C1gc9Ua_z965KRxJHJeQ,3858
69
- sglang/srt/layers/linear.py,sha256=u9ND-ZRv_uq8I3bxqbN11bus-kr5DHqdo67ksFA0EcU,50724
69
+ sglang/srt/layers/linear.py,sha256=gtDbHu7ewk7Gta9MECGMT-vtldlQkyY3hsv2MVpYN4c,51000
70
70
  sglang/srt/layers/logits_processor.py,sha256=_3TZNUbvjmw63ywBv6V6WU87G1TErMaXGa7-VNExM1E,12190
71
71
  sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
72
72
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
@@ -77,22 +77,22 @@ sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSa
77
77
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
78
78
  sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
79
79
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=4mVyFPfZxPTwkQHGNCfI_4hQ8CbsWXJfxz-IQW77gAc,9143
80
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=9BJEAQ5IcSMGvPfa6_D3cP9Gbo2XQ5GHBnF7cw2Rsng,42933
80
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=8bEnIXFXBB8lv0ruH_paqT8uLIHsKYKYhQ9ytzDcsYI,47681
81
81
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
82
- sglang/srt/layers/attention/triton_backend.py,sha256=mtLs768rhtCF_BVAV_rmYac0U4R1_HHc-9ic4JratsY,10100
82
+ sglang/srt/layers/attention/triton_backend.py,sha256=mbYaYKHYrUyL2zEXrPtDRIcvVNe6L-bmcdLhKF92V-0,21292
83
83
  sglang/srt/layers/attention/vision.py,sha256=zLjKmzUlkgq1RFcP3b4EPArOAKovoaDLgYfM5SyB2wM,13181
84
84
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=tcUAdacBWTpZmro7vZeRPasfwRWFlCR4bxfGpFOYgZ8,17831
85
85
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=ztLWKeW-260EiIw3kCAbtUTUHHxAICz2mVxZJFes4oI,31167
86
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=R6QgrcBf6XuLzQ1jamrILNypaPi3ynkMPTfjae0d3JA,12695
86
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=U46NHqzRJDKAEfGIgsqrh6TSp8vu1GGEA2LPMwcwsVw,12691
87
87
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
88
88
  sglang/srt/layers/moe/fused_moe_native.py,sha256=OEWpM93X5tJG4-rwz5qmdpTzEUR73zun29YRV3bZglY,4269
89
- sglang/srt/layers/moe/topk.py,sha256=6A4W1ztlV2dQvkXcPJvFvAg0QEhE58Q7eE7iw8N36J4,7230
89
+ sglang/srt/layers/moe/topk.py,sha256=uegUdoIb9OVi3_CllGN350DlVYuDJt1kjQZKxNdwKgA,7244
90
90
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
92
92
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=aS8t1XUvlTnO9IQaxGjW5bOXP4FrJDXzymEIvlIDMro,22603
93
93
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
94
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=tWV490Ao5vIasPDBBY9ktuAZdWlONnnv3uPCifcTfpI,37241
95
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ruK09nIJqGHKNa8CJsws,22691
94
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=Hyd7dDTaoC5SLdNDH9n6ot3Ibpv7zva2ZG2pRkxBRdc,37964
95
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfMz1yQg2_Tmv_i59Ml6Q,22836
96
96
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
97
97
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
98
98
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -124,8 +124,9 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ru
124
124
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
125
125
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
126
126
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
127
- "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yN6aItfVswTn6ggd1NJb7MmO9evCet5cRIiKeHcb2r8,3733
128
- "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yN6aItfVswTn6ggd1NJb7MmO9evCet5cRIiKeHcb2r8,3733
127
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
128
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
129
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
129
130
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=IMTKnPRjhSptf7smIkpqmMjSML9SQ7I8CpkbR3Inzqk,3258
130
131
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
131
132
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
@@ -149,11 +150,13 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ru
149
150
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=eD3Y9sOwHFcgVdOfya8KxPhvLx_b4whfEWm4d8Y2HW8,3268
150
151
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=KIfpZydSl31FOEqq0EBfxTyWRj1QTDwTjkPHFjNO3_A,3253
151
152
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=OnadAdmDbX17Ni9VPrNXYSsxYhbtBeniCxxhhb0UmUk,4733
153
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json",sha256=OnadAdmDbX17Ni9VPrNXYSsxYhbtBeniCxxhhb0UmUk,4733
152
154
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json",sha256=OnadAdmDbX17Ni9VPrNXYSsxYhbtBeniCxxhhb0UmUk,4733
153
155
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
154
156
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=LD4Z5MRR5Ivi4bYB5hMgymtvmFyVJwq6gmehA7fzecc,3271
155
157
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=GLIH4egg-pE-NWU5XqKuJCoRXciHN6GSc3NaE4PaeYg,3261
156
158
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=bKsYVJm-IvWHWpxUG-lMPkyNz0nQpDb4UEIv895c9JI,4730
159
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json",sha256=bKsYVJm-IvWHWpxUG-lMPkyNz0nQpDb4UEIv895c9JI,4730
157
160
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json",sha256=bKsYVJm-IvWHWpxUG-lMPkyNz0nQpDb4UEIv895c9JI,4730
158
161
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
159
162
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
@@ -166,6 +169,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ru
166
169
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=4O4VeMpgFNrqWyWqWgYgcYAgBQnOlAXvt26CRSXK-sY,3270
167
170
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=qfjbXqbl902TuiyzzomUy2sMvs-Dud8ZphDRY5WIPBM,3260
168
171
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=_bw1_oads8tz51i4RVQUAjNi8r3b2Q2jPbi50TLFzlY,4732
172
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json",sha256=_bw1_oads8tz51i4RVQUAjNi8r3b2Q2jPbi50TLFzlY,4732
169
173
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json",sha256=_bw1_oads8tz51i4RVQUAjNi8r3b2Q2jPbi50TLFzlY,4732
170
174
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
171
175
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
@@ -176,6 +180,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ru
176
180
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=Pi2coJlJlpgqXiPRd77B_eCmmi7sCdBuoSGK1RA5YO8,3258
177
181
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=p2qlRhTt7owWB8keEmoCrPZpo39IAxsKnULFQ7R38SI,3873
178
182
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=AKIX43JVc26ERb862pNOMEfGhsgyk1OGa42EptAfG1s,4409
183
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=AKIX43JVc26ERb862pNOMEfGhsgyk1OGa42EptAfG1s,4409
179
184
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=AKIX43JVc26ERb862pNOMEfGhsgyk1OGa42EptAfG1s,4409
180
185
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
181
186
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
@@ -183,6 +188,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ru
183
188
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=rR8b-OuQ3watb8b2zuNlxKDSZpzlAagm9nb-FdKkt7s,3270
184
189
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=8G_QqV_DhvZ6xSavMSpeE6qcXPVpsVjEtJabydybKqY,3263
185
190
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=54KpHTMGt_zDQHqbdopuVHPpiI44ZsN_5LBUBZ_woY4,4733
191
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json",sha256=54KpHTMGt_zDQHqbdopuVHPpiI44ZsN_5LBUBZ_woY4,4733
186
192
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json",sha256=54KpHTMGt_zDQHqbdopuVHPpiI44ZsN_5LBUBZ_woY4,4733
187
193
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
188
194
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
@@ -190,32 +196,41 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=-49WRpq9OtRZocQjW-YNcB_ru
190
196
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=3YQakSmUKhpw1KO7Hn-tEc-yyD1fEj01_6JlSYnrrlI,3274
191
197
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=W2ka_U8pzwjzX62NEGKXR32uuSR_zfHD1XjXYf5bgBs,3262
192
198
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
199
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
193
200
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
194
201
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
195
202
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
196
- sglang/srt/layers/quantization/__init__.py,sha256=_Sba1KQnmZNKGDKM1MfBs2T3uDqOHfeW6IHO2mTUvfs,4471
203
+ sglang/srt/layers/quantization/__init__.py,sha256=MU8FV-uMl7XVrECUmyNFxAOobGza05I1FlDpvfoHP1o,5848
197
204
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
198
205
  sglang/srt/layers/quantization/fp8.py,sha256=ibttPVCUsCQ0LXy7FUb8wnzqGcGZQXQLqwCB4a2fai4,35160
199
- sglang/srt/layers/quantization/fp8_kernel.py,sha256=qlXXT9WO3TKxZv9r0rAdcDjO_jZYobtKnvTxmHDrfoo,16304
200
- sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
206
+ sglang/srt/layers/quantization/fp8_kernel.py,sha256=rbuoOhgpA8_sWE5Tm3C9m0YmLqUSSBKKunLiAnHhh6c,19300
207
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=_1uEpKdwq-GSKRMdSWpK-0z9koNctHfnuQr9wBIKRfw,5211
201
208
  sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
202
209
  sglang/srt/layers/quantization/modelopt_quant.py,sha256=_VdVz77dTP-IczPeFrdH6Ttro2D26BZvMlZkCKWj_5o,6200
203
210
  sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
204
211
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
212
+ "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=CPo1WRF0HgsQMPBkvpoImElQMrfwpJLhEvL86e6fkPU,3247
205
213
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
206
- "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
214
+ "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
215
+ "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9ya9f1Nt0g0RD-6sRRBZOaIPLSpSFZCz7jNvqTPrgFE,3732
207
216
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
217
+ "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=UZljnxxCSjwnZlX3OgKWZJGXCf5BWF_agEpNX8I4Zxc,3248
208
218
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
209
219
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
220
+ "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=3matoCzEe4aexwoe7YTmkjyE4NA8khWXjL5EySuNwzA,3254
210
221
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
211
222
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
223
+ "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9f8Ib4gLEFSfdNpO8IL8uiONImvqnlPbJrZ0HM3OB-o,3247
212
224
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
213
225
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
214
226
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
227
+ "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tG5_iVeRBHTgHX-liOf79nWRjj_lUZ-NQWTbBrBgORQ,3246
215
228
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
216
229
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
230
+ "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
217
231
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
218
- "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
232
+ "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
233
+ "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bSxTaptdcgj27mQGmdUmQtYTn4V_8EcmtRaVNigKjLA,3730
219
234
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
220
235
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
221
236
  "sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
@@ -223,77 +238,95 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
223
238
  "sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
224
239
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
225
240
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
241
+ "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=B0lo3SuoQXhBEnojH2TwpVeurvlKD8yI8kQrJ5ORhWU,3249
226
242
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
227
243
  "sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
228
244
  "sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
229
245
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
230
- "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
246
+ "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
247
+ "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xxNfGtHAlxDVX7PBnqExJN0UnYlA0UbaYoXUmuX0JsI,3739
231
248
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
232
249
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
233
250
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
234
251
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
235
- "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
252
+ "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
253
+ "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=16Qk4BrbVQOdR9Et2T3SnLphQdvEwuuG3W3XCmAFa3s,3734
236
254
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
237
255
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
238
256
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
239
257
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
240
- "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
258
+ "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
259
+ "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=m0E9TwfZjvtopYFhI92VSaqhiUZpjBu69kv56rKMEuQ,3729
241
260
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
242
261
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
243
262
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
244
- "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
263
+ "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
264
+ "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=A4uzWJTNhyNVh7ntOvUpT0TheaEVu_js0NCNdav8mTs,3730
245
265
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
246
266
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
267
+ "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
247
268
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
248
269
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
270
+ "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=h32lCTFilLlyKbMeuJvNWG1v0yJJzNj93kwSvlrHfaY,3249
249
271
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
250
272
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
273
+ "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0jX-z2lTgVw7ABLmWsIsQdqW4EjmbXKRDHye_XPLCAE,3245
251
274
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
252
275
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
276
+ "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_exM3wJ3FMmGHweBcH-8IxwZBzaOmPaF3ScMM6KDpiY,3253
253
277
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
254
278
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
279
+ "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ENRWYdUwI0ooHb6IwcHliupRWOPnw-7-WtxZB-qQGJI,3245
255
280
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
256
281
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
257
282
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
283
+ "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TdWuE2RIsIyr4Im24MuWK3XyiNtbhO_hAiAXDz5gNUk,3246
258
284
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
259
285
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
260
- "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
286
+ "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
287
+ "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sTvaJ0RiCaQem4F1z7oES6RVRJ2gKgBuccX13S1SqGc,3733
261
288
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
262
289
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
263
290
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
264
291
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
265
- "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
292
+ "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
293
+ "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Rq-eMMK1deUJzmHQOH0E_pwQP7l-ZU-ECTP7Xwegavw,3736
266
294
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
267
295
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
268
296
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
269
297
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
270
- "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
298
+ "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
299
+ "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_RHvFcbtpsZBKxZte2-E3SUHtL1pwRtqwhSV4BMcyKo,3734
271
300
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
272
301
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
273
- sglang/srt/lora/lora.py,sha256=71sVFPB23THA4O06v2_G91vaY3MhAchsti-LErycJ6g,14869
302
+ sglang/srt/lora/layers.py,sha256=r34oprzwyE3SWPvaNkBvXWPtfa-0IY987_bjj36ySfw,9996
303
+ sglang/srt/lora/lora.py,sha256=_WrZxS6-sarwUPvumcReyKGrH6fSCd8-UsoX56aQJ4s,7293
274
304
  sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
275
- sglang/srt/lora/lora_manager.py,sha256=fa-Sy4b3k_YzJJY5XQgxCX5AycnraF-f600jDBaXKzo,13991
276
- sglang/srt/lora/backend/__init__.py,sha256=ul5yTkHrwDTv8RZXu_Miw5_XpBlxqKeGmLfUT6Ph7EE,211
277
- sglang/srt/lora/backend/base_backend.py,sha256=MZfUhIwujwDyN2hFoVMB3N_gJokel6aU4Scc8fv5_L0,3575
278
- sglang/srt/lora/backend/flashinfer_backend.py,sha256=qXqtt4w_-Gqkk-j27CHHI1Jeu0gb8jr0ngI54eCNdqI,2845
279
- sglang/srt/lora/backend/triton_backend.py,sha256=MzVZYtPXwqS8DfeFMIpI0xNI90oVHcasaT3ppea9Vns,1722
280
- sglang/srt/lora/triton_ops/__init__.py,sha256=F4NrXZ4bSD39hkPVkby6OR-0Ap7dOOUsB0BjqjGMjy4,195
281
- sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=gFO53wGm93GpDmFaJgqyD1tgmrDllZ2bCBhGczZg0_4,5664
282
- sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=GvPFXGXC2qLh4djlIIyAvobo9VLJcSEtyjcaveQh0Cw,3981
283
- sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=pZxVJOMaf0OwMUJAw37KA4ZcoQb8gLpFGbYPirX8Oqk,4298
305
+ sglang/srt/lora/lora_manager.py,sha256=-7ZWAL-E2mW3acXd9M0Z_slnTV1GfzGBD4RRv3cjafs,7851
306
+ sglang/srt/lora/mem_pool.py,sha256=eV_GXETxNODPVIAnTEeUUUVn0IVgguBR_mYFzIK-VHA,6835
307
+ sglang/srt/lora/utils.py,sha256=6i7Q1Y-1LLbRkeCMv_lKIzkTN0veUTLbc8wlHn7R-bA,4571
308
+ sglang/srt/lora/backend/__init__.py,sha256=98L_KRRnE3gcGcx7Lb6yjAEUUE_Yay3QszcQXdzYsDw,708
309
+ sglang/srt/lora/backend/base_backend.py,sha256=dldwA7vTWrB1ln1MwLYKNtMkBoAgD7OLSlWe9tL2lzk,4602
310
+ sglang/srt/lora/backend/flashinfer_backend.py,sha256=fXfkl7Cpw8ap2bCrgWdn_gEUzMXX1pNjNuiPw3kA76U,3984
311
+ sglang/srt/lora/backend/triton_backend.py,sha256=ZT5M30vj8x77Kltukpga4wk1sd8fT4n_FdsOMQBTMI0,2610
312
+ sglang/srt/lora/triton_ops/__init__.py,sha256=JGOYPIn1XbGcyJTbt8A0qoc02PYONSGNNjGkC8yJpAM,283
313
+ sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=qve4oNZHYUFk9ckmT2BVuDNMEvrN7Quu6RsS8Iz3uRQ,5066
314
+ sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=BmIcTZMnlSnie9rnMl4KvLpc4Njsk7_IppbUqitf9Xw,5738
315
+ sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=kv-AvJ_Bi3yWjGvFnSwXvP66iJvY9n9pEnJzJ9-DWzo,3982
316
+ sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=Ai5vPriT4OgACwK7xrpGgf5L1oaN9x0jwNKMChu3uI0,4299
284
317
  sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4wL9zoG8Xz8,10909
285
318
  sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
286
319
  sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
287
- sglang/srt/managers/detokenizer_manager.py,sha256=A-tZi9VPkrIAVteQItYUY-07V1rWmySFHNcVf8qAdPI,9578
320
+ sglang/srt/managers/detokenizer_manager.py,sha256=XC2INyykOgwmIrFEGc-zf6LGZ5mMt6oPZt1YRXW_cbY,9650
288
321
  sglang/srt/managers/image_processor.py,sha256=s1QH9cSzT_nnitc6idzFjuGDp-pDnMTpbVZoQfzdSXU,20671
289
- sglang/srt/managers/io_struct.py,sha256=1Z6MCVI1LN2lS_7e8WHkpVNT_LW62mE-jpZ2Jn_FAtE,18267
290
- sglang/srt/managers/schedule_batch.py,sha256=oP6ygJUOmo6PuXcA_wecRvOOa_WdpwmIyCPSgJy4qAc,48743
322
+ sglang/srt/managers/io_struct.py,sha256=9jhu794cc_BljFmVL6kQseTHGZNwEzONdlGEy_wjAcA,18357
323
+ sglang/srt/managers/schedule_batch.py,sha256=smqDrzohvA8j76CLgI53CvpduheW1m__26S0O8HcCf0,49187
291
324
  sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
292
- sglang/srt/managers/scheduler.py,sha256=akwBfBcNgpCXY1vp3FlD5-bOUMKfUBR5AC3XzSBRDYQ,70757
325
+ sglang/srt/managers/scheduler.py,sha256=w0FPjiU5MoyP58UdJoPBr-hf-WmlWPpqb-5TSJDJBLo,71908
293
326
  sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
294
- sglang/srt/managers/tokenizer_manager.py,sha256=TjhX0IeFCmk31PDmtVV7Ilc8rqI361XUf_p2KO3ai7s,38669
327
+ sglang/srt/managers/tokenizer_manager.py,sha256=q2FhMcjX9PYTWBz6E98AsE8INlF5I_YFfTWzkBWBjdk,38900
295
328
  sglang/srt/managers/tp_worker.py,sha256=OiHpFR9Hy1GpgLEkTDsykBiFuv1VKmkjQS58gQVPQIs,8126
296
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7p6zREndc4a9fmYfqW4iY9IYANxdoAioaf0hU92-8Ow,8893
329
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=e0Fz0Yrwl98azg_6vwLDE6b_5WMcb5llPIbxLfoYwAc,9090
297
330
  sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
298
331
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qxgpSHm3qtMdab4U35Mr2BE9TQNjElrnrNMTwL_Osdo,1049
299
332
  sglang/srt/mem_cache/chunk_cache.py,sha256=hc_reKKvoI4r8xkgf4I4eIkwXWTJC2ZXaQWuODQZnx0,2572
@@ -302,9 +335,9 @@ sglang/srt/mem_cache/memory_pool.py,sha256=9ud97u1cXnN6O0qlR8tv8woN_20gqisTV6aBg
302
335
  sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll53MSLeU,11754
303
336
  sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
304
337
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
305
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=Tv9ktSMaHNcAUUU2ERmxouFc6c94qbNMGt7_usQCD3w,18202
338
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=hH646E_c4UlclGEawPDjg4KHgTUEk70WrPl6C7nnltM,18774
306
339
  sglang/srt/model_executor/forward_batch_info.py,sha256=t1RlBgoeS-_Ikl28Xjvt-aouh1nNUc3eLM4iGY4_QqY,14988
307
- sglang/srt/model_executor/model_runner.py,sha256=5ET8CXwSrzIKb_rDW-6S0p7tv-iCPoEwZk3dESwPdF8,33090
340
+ sglang/srt/model_executor/model_runner.py,sha256=uohQ2n2R1HcVyaHwbdwM6xDvFxZSLgxacjMSrrogLpw,33537
308
341
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
309
342
  sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
310
343
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
@@ -314,7 +347,7 @@ sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,
314
347
  sglang/srt/models/commandr.py,sha256=y8DFUW0NKbkoY2DP6nhgJ1f7F_ysjaHEkEnZYZW2zdk,14523
315
348
  sglang/srt/models/dbrx.py,sha256=-L9QkUr_xuMuI6mn0AzG_VE1MqRXoaaFtD4r8UuAzkY,14789
316
349
  sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
317
- sglang/srt/models/deepseek_v2.py,sha256=1JfWeo0q0N7q9nRuz3pfM4QWTTGu3e1qk2957X1x328,38499
350
+ sglang/srt/models/deepseek_v2.py,sha256=9_284fDT15WEmv8qfnH2EzKX8fxSZrNyiz4iQtgb0tI,39065
318
351
  sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
319
352
  sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
320
353
  sglang/srt/models/gemma2.py,sha256=cyQfby-kp2OZPsUACmBh3-jsXkYwQg9Tj6xqtZ7mTwM,15947
@@ -366,9 +399,9 @@ sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZj
366
399
  sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
367
400
  sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8n_F4Z6Jd8mZKfF23buuLZ5dWuVfyqDWkUI,2968
368
401
  sglang/srt/speculative/build_eagle_tree.py,sha256=zWthboIgzPzSOXcGxDpDv0rBOQP55HYGrBKGqm2gWF0,20732
369
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=5ZCy6ndPA2p95xDgo2kXWD3zCtVaq4q5X0HBpAbB3Xs,7929
370
- sglang/srt/speculative/eagle_utils.py,sha256=BV89f2CTp9H0pSvJfK13WYvTL7LW3BtcplQfLngKihg,24451
371
- sglang/srt/speculative/eagle_worker.py,sha256=4oROLwUBJIwEHNHNEfvsy74DqLQLVc4KfjdR-MrB1OM,12038
402
+ sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=FY4hcwd0Blx7AXbeX6quaXPNgWA8WGIqVcQiEgHyERk,8002
403
+ sglang/srt/speculative/eagle_utils.py,sha256=ypjVmVTVzCGclOVHRMJxdLUSPkf1-7bNXQS0oP6dn5U,25644
404
+ sglang/srt/speculative/eagle_worker.py,sha256=33zC6txEsNp9hD48iy-_67ov83Pf4iASulLg8GHLy5U,12898
372
405
  sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
373
406
  sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
374
407
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
@@ -386,8 +419,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
386
419
  sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
387
420
  sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
388
421
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
389
- sglang-0.4.2.post3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
390
- sglang-0.4.2.post3.dist-info/METADATA,sha256=eVi6WuPieNGNX7TzNcBd8JolIIPQphw6609pqdALCUQ,23763
391
- sglang-0.4.2.post3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
392
- sglang-0.4.2.post3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
393
- sglang-0.4.2.post3.dist-info/RECORD,,
422
+ sglang-0.4.3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
423
+ sglang-0.4.3.dist-info/METADATA,sha256=cpA0ecZd4jfaThOrStEpOGbXDTorUxqYdU4catzo2t4,23815
424
+ sglang-0.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
425
+ sglang-0.4.3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
426
+ sglang-0.4.3.dist-info/RECORD,,