sglang 0.4.3.post4__py3-none-any.whl → 0.4.4.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. sglang/bench_serving.py +1 -1
  2. sglang/lang/chat_template.py +29 -0
  3. sglang/srt/_custom_ops.py +19 -17
  4. sglang/srt/configs/__init__.py +2 -0
  5. sglang/srt/configs/janus_pro.py +629 -0
  6. sglang/srt/configs/model_config.py +24 -14
  7. sglang/srt/conversation.py +80 -2
  8. sglang/srt/custom_op.py +64 -3
  9. sglang/srt/distributed/device_communicators/custom_all_reduce.py +18 -17
  10. sglang/srt/distributed/parallel_state.py +10 -1
  11. sglang/srt/entrypoints/engine.py +5 -3
  12. sglang/srt/entrypoints/http_server.py +1 -1
  13. sglang/srt/function_call_parser.py +33 -2
  14. sglang/srt/hf_transformers_utils.py +16 -1
  15. sglang/srt/layers/attention/flashinfer_backend.py +1 -1
  16. sglang/srt/layers/attention/flashinfer_mla_backend.py +317 -57
  17. sglang/srt/layers/attention/triton_backend.py +1 -3
  18. sglang/srt/layers/attention/triton_ops/decode_attention.py +6 -6
  19. sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +3 -3
  20. sglang/srt/layers/attention/triton_ops/extend_attention.py +4 -4
  21. sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py +3 -3
  22. sglang/srt/layers/attention/vision.py +43 -62
  23. sglang/srt/layers/dp_attention.py +30 -2
  24. sglang/srt/layers/elementwise.py +411 -0
  25. sglang/srt/layers/linear.py +1 -1
  26. sglang/srt/layers/logits_processor.py +1 -0
  27. sglang/srt/layers/moe/ep_moe/kernels.py +2 -1
  28. sglang/srt/layers/moe/ep_moe/layer.py +25 -9
  29. sglang/srt/layers/moe/fused_moe_triton/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  30. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json +146 -0
  31. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  32. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
  33. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  34. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  35. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +63 -23
  36. sglang/srt/layers/moe/fused_moe_triton/layer.py +16 -4
  37. sglang/srt/layers/moe/router.py +342 -0
  38. sglang/srt/layers/parameter.py +10 -0
  39. sglang/srt/layers/quantization/__init__.py +90 -68
  40. sglang/srt/layers/quantization/blockwise_int8.py +1 -2
  41. sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  42. sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  43. sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  44. sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  45. sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  46. sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  47. sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  48. sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  49. sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  50. sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  51. sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  52. sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  53. sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  54. sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  55. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  56. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  57. sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  58. sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  59. sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  60. sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  61. sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  62. sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  63. sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  64. sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  65. sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  66. sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
  67. sglang/srt/layers/quantization/fp8.py +174 -106
  68. sglang/srt/layers/quantization/fp8_kernel.py +210 -38
  69. sglang/srt/layers/quantization/fp8_utils.py +156 -15
  70. sglang/srt/layers/quantization/modelopt_quant.py +5 -1
  71. sglang/srt/layers/quantization/w8a8_fp8.py +128 -0
  72. sglang/srt/layers/quantization/w8a8_int8.py +152 -3
  73. sglang/srt/layers/rotary_embedding.py +5 -3
  74. sglang/srt/layers/sampler.py +29 -35
  75. sglang/srt/layers/vocab_parallel_embedding.py +0 -1
  76. sglang/srt/lora/backend/__init__.py +9 -12
  77. sglang/srt/managers/cache_controller.py +74 -8
  78. sglang/srt/managers/data_parallel_controller.py +1 -1
  79. sglang/srt/managers/image_processor.py +37 -631
  80. sglang/srt/managers/image_processors/base_image_processor.py +219 -0
  81. sglang/srt/managers/image_processors/janus_pro.py +79 -0
  82. sglang/srt/managers/image_processors/llava.py +152 -0
  83. sglang/srt/managers/image_processors/minicpmv.py +86 -0
  84. sglang/srt/managers/image_processors/mlama.py +60 -0
  85. sglang/srt/managers/image_processors/qwen_vl.py +161 -0
  86. sglang/srt/managers/io_struct.py +32 -15
  87. sglang/srt/managers/multi_modality_padding.py +134 -0
  88. sglang/srt/managers/schedule_batch.py +213 -118
  89. sglang/srt/managers/schedule_policy.py +40 -8
  90. sglang/srt/managers/scheduler.py +176 -683
  91. sglang/srt/managers/scheduler_output_processor_mixin.py +614 -0
  92. sglang/srt/managers/tokenizer_manager.py +6 -6
  93. sglang/srt/managers/tp_worker_overlap_thread.py +4 -1
  94. sglang/srt/mem_cache/base_prefix_cache.py +6 -8
  95. sglang/srt/mem_cache/chunk_cache.py +12 -44
  96. sglang/srt/mem_cache/hiradix_cache.py +71 -34
  97. sglang/srt/mem_cache/memory_pool.py +81 -17
  98. sglang/srt/mem_cache/paged_allocator.py +283 -0
  99. sglang/srt/mem_cache/radix_cache.py +117 -36
  100. sglang/srt/model_executor/cuda_graph_runner.py +68 -20
  101. sglang/srt/model_executor/forward_batch_info.py +23 -10
  102. sglang/srt/model_executor/model_runner.py +63 -63
  103. sglang/srt/model_loader/loader.py +2 -1
  104. sglang/srt/model_loader/weight_utils.py +1 -1
  105. sglang/srt/models/deepseek_janus_pro.py +2127 -0
  106. sglang/srt/models/deepseek_nextn.py +23 -3
  107. sglang/srt/models/deepseek_v2.py +200 -191
  108. sglang/srt/models/grok.py +374 -119
  109. sglang/srt/models/minicpmv.py +28 -89
  110. sglang/srt/models/mllama.py +1 -1
  111. sglang/srt/models/qwen2.py +0 -1
  112. sglang/srt/models/qwen2_5_vl.py +25 -50
  113. sglang/srt/models/qwen2_vl.py +33 -49
  114. sglang/srt/openai_api/adapter.py +59 -35
  115. sglang/srt/openai_api/protocol.py +8 -1
  116. sglang/srt/sampling/penaltylib/frequency_penalty.py +0 -1
  117. sglang/srt/sampling/penaltylib/presence_penalty.py +0 -1
  118. sglang/srt/server_args.py +24 -16
  119. sglang/srt/speculative/eagle_worker.py +75 -39
  120. sglang/srt/utils.py +104 -9
  121. sglang/test/runners.py +104 -10
  122. sglang/test/test_block_fp8.py +106 -16
  123. sglang/test/test_custom_ops.py +88 -0
  124. sglang/test/test_utils.py +20 -4
  125. sglang/utils.py +0 -4
  126. sglang/version.py +1 -1
  127. {sglang-0.4.3.post4.dist-info → sglang-0.4.4.post1.dist-info}/METADATA +9 -10
  128. {sglang-0.4.3.post4.dist-info → sglang-0.4.4.post1.dist-info}/RECORD +131 -84
  129. {sglang-0.4.3.post4.dist-info → sglang-0.4.4.post1.dist-info}/WHEEL +1 -1
  130. {sglang-0.4.3.post4.dist-info → sglang-0.4.4.post1.dist-info}/LICENSE +0 -0
  131. {sglang-0.4.3.post4.dist-info → sglang-0.4.4.post1.dist-info}/top_level.txt +0 -0
@@ -3,15 +3,15 @@ sglang/api.py,sha256=rYa2qKE88_RJQwYVvjuJzEZECf75ujchZVqi0q48tqc,6890
3
3
  sglang/bench_offline_throughput.py,sha256=OQb-AjL4UNymmir02ht43uzgaNsnO_I11nXSowKMqBI,13841
4
4
  sglang/bench_one_batch.py,sha256=mVgmg1LP_Y67HlH4M2V7QvuO1aFjCE0n3gDRXW_w_NQ,17935
5
5
  sglang/bench_one_batch_server.py,sha256=iu73SsvYwnuRktYZDz1P6psMiRx8MbEbF5sbsYJdzYg,5962
6
- sglang/bench_serving.py,sha256=DGpC7L7fH3F8h_yOvkhMWwSxPQnNaYInE9gSv13Xeb8,55327
6
+ sglang/bench_serving.py,sha256=ScnPmhg4S5qYJZ0GdwJZf4Eb-j3EVS-gMRQAnFoQqIs,55355
7
7
  sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
8
8
  sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
9
9
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
10
10
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
11
- sglang/utils.py,sha256=hhoF2AILUCu0p7La-7pYAF2C6FeeaXGvErj3SqJTYxg,15404
12
- sglang/version.py,sha256=ZlAh3ELJql74nlCek86guWjIlTdZddRBNjrR9kE8_Dk,28
11
+ sglang/utils.py,sha256=jMMRaAChfWgU_F2BMbDF9G896shiemdzCnWxFocy4QM,15195
12
+ sglang/version.py,sha256=4ZEzfisb7CBE8mZJHsVVkqSmecAalLdtwfQHDTnpWTw,28
13
13
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- sglang/lang/chat_template.py,sha256=0tZX67LgtYGrWopnSuTeqWVdxaw2deJOFWOBJpd6htU,17547
14
+ sglang/lang/chat_template.py,sha256=VKI5RNydfo1-qz6HT-aYm6ecBsa1RjkTZLZLCiBb5Ig,18269
15
15
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
16
16
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
17
17
  sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
@@ -24,27 +24,28 @@ sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThc
24
24
  sglang/lang/backend/openai.py,sha256=BQj1FHPXmSfFVQV-SIs7WW6v7tUDUckjtpvs9mhP8Ok,15645
25
25
  sglang/lang/backend/runtime_endpoint.py,sha256=CAVh3X9F80t_2tkJECF__7AdCQtqDg1AHDqIoKIPnvs,16755
26
26
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
27
- sglang/srt/_custom_ops.py,sha256=GUIn2rI5KXAkVdl0LZ-tMSoXDIscQgVRkRl-MD75kdA,5007
27
+ sglang/srt/_custom_ops.py,sha256=0zKNhmLhtUNTBE_WwuzkG_Y_Kaufai84VGe4fJ8v6-o,4975
28
28
  sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
29
- sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
30
- sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
31
- sglang/srt/function_call_parser.py,sha256=-siKVUqr3B3pufJ8G0wKGoQKQniAcCa8K7x1kFgQyyQ,19532
32
- sglang/srt/hf_transformers_utils.py,sha256=ymMz_MjaeHirDwzzCWz5ktPEzWdIoP3K9DiZqNtjs6k,7737
29
+ sglang/srt/conversation.py,sha256=qiOAs42rftnIgjPq5c8Ce0DSCMsaRlvFxyUTpUXR_4M,24664
30
+ sglang/srt/custom_op.py,sha256=vhdofFbWtpdtaA4NG4tkanWwEsfvBnjh6OPKOxmxXdU,3426
31
+ sglang/srt/function_call_parser.py,sha256=k_L4Ex4Lc5TUNWiCO6BcSJjlv2TQmvphyGZIxHfl0e0,20835
32
+ sglang/srt/hf_transformers_utils.py,sha256=FPoOvGdEy-lwrOOqHaQnb_tJCGmCW3CpKUjleY7Cwj4,8221
33
33
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
34
34
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
35
35
  sglang/srt/reasoning_parser.py,sha256=45xsU9RCPfyG4_Zx4y3-JPyNgAtrqwKI4j5R2NT4g1s,5594
36
36
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
37
- sglang/srt/server_args.py,sha256=61p3vmiMcw3I-g_Xfs2OsMRKroxqzJkGdf_zerRN2js,44682
37
+ sglang/srt/server_args.py,sha256=ZKYb3tL6m8pVNH-3m9yRRRMkz2mlLkRhV0acME7dn0Q,44864
38
38
  sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
39
- sglang/srt/utils.py,sha256=7P7fyymG10E9ImmjUlRDVuJa8zn9C2tfnxULfw1CP30,47673
39
+ sglang/srt/utils.py,sha256=Nh85Igdbmi5nN09s2rFsnnAIUcijk5jljmktiYByZp8,50163
40
40
  sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
41
- sglang/srt/configs/__init__.py,sha256=naCw3LwTLHOCsldy2UyRmxoIWrWfX3hgEP2Gt7frXaw,382
41
+ sglang/srt/configs/__init__.py,sha256=iDfWab2iDMQ3F5bezHjM-fHi7u8iKqk_UBLZFabXxiI,470
42
42
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
43
43
  sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
44
44
  sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
45
45
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
46
+ sglang/srt/configs/janus_pro.py,sha256=v034OiY264vMmGvanHuNL6FzikgqTVwyUoKO26c-PQ0,19015
46
47
  sglang/srt/configs/load_config.py,sha256=NcptQBTG-TBJpfcnE1e32BefBBzrINm9n5bYzTrH1bs,3287
47
- sglang/srt/configs/model_config.py,sha256=DFcYhNcTngoCfGn3lSrIBhOZvuzaFmuSSd0afGn8BXM,18750
48
+ sglang/srt/configs/model_config.py,sha256=3v43xDudZkF1AEOZzYHNsq3FDsEBW2-MWSduggDYOE4,18748
48
49
  sglang/srt/configs/qwen2_5_vl_config.py,sha256=x7erMMDwqlI2l6YYBvZCXlZ53li2waWWgyhJjz273dk,48223
49
50
  sglang/srt/constrained/base_grammar_backend.py,sha256=q2RTH9hv2nKUF_-HVdZzEUjG6LxejPqXjvUOE1NDWaU,6788
50
51
  sglang/srt/constrained/llguidance_backend.py,sha256=ej7wN13SzCsT310C6OIyUg2zs5jeuLl3Ocok9SP9-c4,5702
@@ -53,52 +54,54 @@ sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2
53
54
  sglang/srt/constrained/xgrammar_backend.py,sha256=W7_qyyQiOUwejIPCnWgJrp6ka5fy137SiJtxt3VNruM,6220
54
55
  sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
55
56
  sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
56
- sglang/srt/distributed/parallel_state.py,sha256=ExRNf5pgoUK-UP0oU-PpBgaXoutKZwFs-7djjZSBD0k,49196
57
+ sglang/srt/distributed/parallel_state.py,sha256=lgsscyYrqJLvblF6c5JdfYQrT-YfOKBPCHZnhazlD28,49614
57
58
  sglang/srt/distributed/utils.py,sha256=U-BSaXYjWwnfG8g-tUfBhjKt5Ug097nyHtu3g3aea_Y,8473
58
59
  sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
59
- sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=cFVxk9zMBZIDdvkM6HAkpWxN80iTbF4ycwtuFIJvjAk,22191
60
+ sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=kmbAV_W9ObjI7NH8X0gV__D9iHCuj6QDyhU5ZKhEQD8,22209
60
61
  sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=q2q1A_Sqvrvkrgf7Tjg5XhXR1JWzzUUPHSicAKK2SjE,11022
61
62
  sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
62
63
  sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--ZqapHtvm70Lgl7obtE6ZfgeAiU,10064
63
64
  sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
64
65
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
65
66
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
66
- sglang/srt/entrypoints/engine.py,sha256=4BVw8wJIDXtHJu2YM3H1emJIOHO9lt2RXlX8YtsCs-o,19647
67
- sglang/srt/entrypoints/http_server.py,sha256=avgOaHQsxXdwGGbcBE2H225iBUp6Zz21z6hr6y6Z_VU,24724
67
+ sglang/srt/entrypoints/engine.py,sha256=xZmo4F6OuwjOV2CXQwmUAjpoj5tZTV7sJIEEXTFhZkg,19763
68
+ sglang/srt/entrypoints/http_server.py,sha256=lb5sFSBKYiI7NZeFPeFeypjQkhMm5hkYqIwo9AsFDZQ,24724
68
69
  sglang/srt/entrypoints/verl_engine.py,sha256=ICo0F860JvoCy3aKTTLpEHqKcDX4WIN5V85q6873IxA,5798
69
70
  sglang/srt/layers/activation.py,sha256=f097ndEIQ-lQ5JLa4HrcjqLLBeRcZbjYmj2gmVknnkA,5993
70
- sglang/srt/layers/dp_attention.py,sha256=nllN2eCd4KtiOmWdG6dQRlI3CVMi_CBzn7UihlXNLi0,6547
71
+ sglang/srt/layers/dp_attention.py,sha256=gaWG5c6Np2OJls4ldZ5Ea2etpY3T-0uyru7IKBuEtVM,7199
72
+ sglang/srt/layers/elementwise.py,sha256=y2mQqjbF2FmFtNYBk5ecTyaj3ELoZyz-rWPY8rrxCtk,13765
71
73
  sglang/srt/layers/layernorm.py,sha256=DI2Ih34bLh5Ex_yd8-X8L7kNRcYStpc9XpIS4zIoM_U,3860
72
- sglang/srt/layers/linear.py,sha256=tuwG1HAJPZJdzuNnlnYQYyP3ISRgWDY8Nz1fljjcOiw,51111
73
- sglang/srt/layers/logits_processor.py,sha256=BcoVrVYndmP9o0lFXj3YjZ8TOzloHXBAzLwjFZi4u-8,23207
74
- sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
74
+ sglang/srt/layers/linear.py,sha256=8HyDjnpUu1ZxdqVRt9jiuO4GvTr2JsO42jwgtl10Qjs,51063
75
+ sglang/srt/layers/logits_processor.py,sha256=RHvuPECoMpdU4Fpv1UrQ3yVz9qHKfbYxLS6k6bNRh0s,23243
76
+ sglang/srt/layers/parameter.py,sha256=-jaiI-dIhiSPigwcQGydBiKEMSVg7XuMcg8Bzb8BHMo,15162
75
77
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
76
78
  sglang/srt/layers/radix_attention.py,sha256=UDL0y4Zasay_Rk-_XmIU4kaGbaF26ONvEHX5EQzLrqI,2260
77
- sglang/srt/layers/rotary_embedding.py,sha256=6-dB-PjdLeY0D5g_5Yx7E8QXtd-MSAfXcFg1ptOp6Hw,43938
78
- sglang/srt/layers/sampler.py,sha256=Cyka1ZvJBtXDl5w1h5pG7bqWDr-w6U0Y53jJKMOdIIM,12034
79
+ sglang/srt/layers/rotary_embedding.py,sha256=3DVmZ9UUYXPiWVxE9Ay25S0g-Oeh-_u7PXvPkRJ8nnA,44032
80
+ sglang/srt/layers/sampler.py,sha256=NIHFR9oot9q-4n414zS3K3oLAHXtuT27mfv-3KgH7Dc,11638
79
81
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
80
- sglang/srt/layers/vocab_parallel_embedding.py,sha256=WFMCGR4EdYloQK5fT1F6BbA7rT5OK76_7pzV81wiJWU,22286
82
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=QUxd4sELx6p3dHvEKmccPZ-phdd_9EjNdwjH3SJ9zxI,22238
81
83
  sglang/srt/layers/attention/base_attn_backend.py,sha256=j6pDIDuxlWVmc8GzNxcAFi1USRo4gqqI7G65XAQ5u5M,3263
82
84
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
83
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=S8t6u0PliXhtUqnDIG8uYU2h1aIhujkJHyqawxWrYPo,45962
84
- sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=De7VK-2-E5cjuwffOFq0KPjQtDfnJnkFJDdOp-nZbSQ,20003
85
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=iSJ7wSlKWB2GmumG1WtakhPOjQmcI60wz5ZBeiayIHU,45980
86
+ sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=BgbGK5iROulOco7o3vYTaPw1Ei3EaQj6Cn9W57Z4amQ,30360
85
87
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KABmBrMqKa4x08kkQYdIcZUGydvmaVJIUfo3y8jhFHI,9270
86
- sglang/srt/layers/attention/triton_backend.py,sha256=3bt0Cs0B3w7QURtq7AdBgTjcGCAj_Ojod3kjWIdk58k,21542
88
+ sglang/srt/layers/attention/triton_backend.py,sha256=CiDI97Yp1mPOpeObpr2F8z-0OU8EPf4mQ9sQzK-8lJE,21520
87
89
  sglang/srt/layers/attention/utils.py,sha256=gwZoJDyJ9OIPMHWl6r3qkuyVp4Sji9juX7Pwvh9PNxI,1131
88
- sglang/srt/layers/attention/vision.py,sha256=rDKvuA14zC8JQaKJFKZSf0hBbpf3F-ELTzcFk0y3l7o,12939
89
- sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=fmUCxXnnWt9FlomBrveedq5WB-Gzw87wPncDCoHVA2U,17926
90
- sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=ztLWKeW-260EiIw3kCAbtUTUHHxAICz2mVxZJFes4oI,31167
91
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=314ubx4GFPmvwxRpsfu-uaTrJ8RKRKK8gdxcdwEMO9s,13244
90
+ sglang/srt/layers/attention/vision.py,sha256=4DjJfK4aUF994R2c_R1HQ6Q7R_CrWIyBT9NGR0yQsvY,12471
91
+ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=OcN_uL6dwAU3ImCqDHH3ZamJCQSofI8C7erTBc-ZPps,17926
92
+ sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=BXUY8ARHBF2s9x9waiEwfZwcMgvuaJA0gxb4OeUZ_tY,31167
93
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=SZezMpXMr2AEEIvWBQYzx_Gnmr2fyFhQET8eucdmfdU,13244
92
94
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
93
- sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=-68q4M7AL7OYMo2JwVv3yGd29jxITVcX0bUjfKn-xAo,13866
95
+ sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
94
96
  sglang/srt/layers/moe/fused_moe_native.py,sha256=domK1jDc5-zJcdwDso-YgBp_O136VHifnvFVlfFvKsY,4325
97
+ sglang/srt/layers/moe/router.py,sha256=gvyK7hXlujfCZCmAIFc3oxfgjuAjzlpPe3mp1Blc6Y0,10419
95
98
  sglang/srt/layers/moe/topk.py,sha256=MOwBNBwxxW__mgmpX08RIqrh77aXLEgr0F5b5iF7hRI,7512
96
99
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
- sglang/srt/layers/moe/ep_moe/kernels.py,sha256=CCvcVRNE8COLDKglczEni_dIj755p5aStVAiyhuiEfE,15378
98
- sglang/srt/layers/moe/ep_moe/layer.py,sha256=EAOGm9RHwZvf_tiLs7azSoQZ4gFTD0GDyiiUN37chJc,27290
100
+ sglang/srt/layers/moe/ep_moe/kernels.py,sha256=hG7E29bbj8HeXEkAd6N0HzJZPn7cwVx1VJptzpRWOv0,15376
101
+ sglang/srt/layers/moe/ep_moe/layer.py,sha256=y8YaVIvzZ5bomc_9wvc1SwcgOddmVPlD5FYQfxI1kPY,27890
99
102
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
100
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=5Rl7IBzDFjkF4g0hbLvT-FbcutxclGhbskBNjbuIlX8,40426
101
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=MYel2jkrny6StRwE1eWKh3ZTG_p_nH6r4xkZZpuK4IU,23438
103
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=R1XYyB-oS0QMxIoGrMKPBlk17Jq-z1dTzafZ7bcTSwU,41856
104
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=tHSxJ2JWwaSwPuyleILJiMH4cNrjf9P5Y8dcAJ8gm5o,24129
102
105
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
103
106
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
104
107
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -129,14 +132,20 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=MYel2jkrny6StRwE1eWKh3ZTG
129
132
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=FGVvsvw23DeDFLj7TNqm402fAtib2cPGRZR3ePBeUp4,2753
130
133
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
131
134
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
135
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=fRzKfVFIcnxqu6DvGJQNltuFRRGz8F-eaL73bIzBzo8,3255
132
136
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=RuUDK9XfgXs1eZESWQR9ba4tu-rCRG_UCYwjaJ568sI,3264
137
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json",sha256=wlCi9aoYp7Zc1GThEutvWDbse0kKnNaQgFJsd_L8be0,3259
138
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=tPYxeo_xUOkjQrZMdf9v4IaFrw0RGaZNLGLJPOhjE_g,3260
139
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=MW7KzNa7DcKm53u2Jh-mnb93A3ICefgQHkdKONJMfew,3255
133
140
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
134
141
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XmKFaMheq7NNrsvYCJteul0w809l_l460ZiDQC9ToGs,3262
135
142
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
136
143
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
137
144
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=23CJv3de1UDX3EduMpylD9AA8qL5kzMSjLK4GDMqlro,3734
138
145
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=IMTKnPRjhSptf7smIkpqmMjSML9SQ7I8CpkbR3Inzqk,3258
146
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=yJzHX8nNWnKwj9M8281dwwPhOHQ039mBpvNgG7poy3Q,3264
139
147
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
148
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=Y4WpmVGlGERHoeoQNGkQ-GC-MsEtMblqnAVuDbARJdw,3240
140
149
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
141
150
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=uv-RwTNZT2n264dLo4eWxUpB3g7QqUyf2MFEGiRvoqQ,3251
142
151
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=XbCRIOmiNqVKh89p-0UxvvspINRDA1iV83f9l5yORwA,3254
@@ -208,18 +217,20 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=MYel2jkrny6StRwE1eWKh3ZTG
208
217
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
209
218
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
210
219
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
211
- sglang/srt/layers/quantization/__init__.py,sha256=QS3lOGK5pP5e27lLDu3nPoLtrcOlzL17vneKoWVX3Rs,9760
220
+ sglang/srt/layers/quantization/__init__.py,sha256=eBbrWZLYhjtzva61n0VTvs4Q60hb2S8qlHRuiPSFiME,11086
212
221
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
213
- sglang/srt/layers/quantization/blockwise_int8.py,sha256=xLn2dApnxAHo6UFqtDe67CDDN3DxcXmwH5cERFNme_0,14985
214
- sglang/srt/layers/quantization/fp8.py,sha256=KBWTZXz74gqAHAOo0PRv1PT4RCF0ymEFq8u0dKHYfbI,37228
215
- sglang/srt/layers/quantization/fp8_kernel.py,sha256=rbuoOhgpA8_sWE5Tm3C9m0YmLqUSSBKKunLiAnHhh6c,19300
216
- sglang/srt/layers/quantization/fp8_utils.py,sha256=ju4JIYatz3hOv-n5XCAh7V6QvOLFzRbceNuuXaXSwac,5815
222
+ sglang/srt/layers/quantization/blockwise_int8.py,sha256=AhMwTDFkE6NmxO_ER6rq1K3k5Bo5ZZAFk1OmEju7rEo,14908
223
+ sglang/srt/layers/quantization/fp8.py,sha256=pX94MLbzeDuwSgagKux8MInW4oeZ0YEm_j4A6mDq62o,39758
224
+ sglang/srt/layers/quantization/fp8_kernel.py,sha256=BNQC6MlDGw5HnaSBfmn7vajoLEE57J8boAy_2KmJ7DU,23816
225
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=IOhwQv7eDT0-L2H9gGOPx5gu7lGJxUHDubltAnNU5ps,11100
217
226
  sglang/srt/layers/quantization/gptq.py,sha256=u-WadiLnVbeskCU8Ar-1IYofraOf1caYkGrUFFYC27k,14651
218
227
  sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRn_imIw8kNgqdtb2lr7BettjgDgimbl1Rubnamjh8,11352
219
228
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
220
- sglang/srt/layers/quantization/modelopt_quant.py,sha256=IfLPea7K99Z92_GX1AvP0_wCiOJWZSqzRTG1EDnat_E,6729
221
- sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
229
+ sglang/srt/layers/quantization/modelopt_quant.py,sha256=F16S-ky8EiqBrTU-u3gCmyc__TVPu03bR1Xo8ojCzRk,6989
230
+ sglang/srt/layers/quantization/w8a8_fp8.py,sha256=AOczkwzh6gRx6nvYFqbrdNyAxmlPsXhws1DEX_nP0Ss,3672
231
+ sglang/srt/layers/quantization/w8a8_int8.py,sha256=V5vxn0wmUL1szj38lsJOKeNNEvFHisU7hZZLO4FfoNc,8733
222
232
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=RdHQxWXwXqvio31192vsLaKjEr4f_DjpMPKlarY1IAk,3251
233
+ "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=0vLaJgo5B9ti-XMFKJuvSoMGjsZQ-RhHSx4cC8Xji-U,3254
223
234
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
224
235
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=CPo1WRF0HgsQMPBkvpoImElQMrfwpJLhEvL86e6fkPU,3247
225
236
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9qdyh6ki9LAyq7VDO9WMRmBOPWKSrZhU-I7z1E9bTKA,550
@@ -227,16 +238,19 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
227
238
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
228
239
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9ya9f1Nt0g0RD-6sRRBZOaIPLSpSFZCz7jNvqTPrgFE,3732
229
240
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=3E-LXaW1FPoTiCPBurm7U2SV4KmWi4xmqwdCjHvZkkA,3250
241
+ "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=aO874ivXpnr7WrFUC7CCN4hxxc_IzOTw2SYNv6L0-2Q,3252
230
242
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
231
243
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=UZljnxxCSjwnZlX3OgKWZJGXCf5BWF_agEpNX8I4Zxc,3248
232
244
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
233
245
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6POXzQZHdNwcBDv1w6BJKbLMRDt0jbFUuMsMNf-ToEs,549
234
246
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=1G2RuKNS88mjD76ZhXFwR-LBhKaqltupGVJQdhsKwJo,3250
247
+ "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=0DXL6DllcvdsF7roLYpE5_7JmBEjcTWiFw2iHjEUURs,3255
235
248
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
236
249
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=3matoCzEe4aexwoe7YTmkjyE4NA8khWXjL5EySuNwzA,3254
237
250
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
238
251
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=JeXNLkbMAjdDKV-WpzQy87SXN06towo3xUofLtvYCQI,551
239
252
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=k81ilt1195nP4r197W7cZonPJ5f2Z5AtSwUZjG2nMOE,3243
253
+ "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=_vkouDwGIPyydiFA2Vx8S9MK5ULFUGs3SwrIPaLhxH8,3252
240
254
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
241
255
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9f8Ib4gLEFSfdNpO8IL8uiONImvqnlPbJrZ0HM3OB-o,3247
242
256
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
@@ -245,12 +259,15 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
245
259
  "sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
246
260
  "sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
247
261
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=fmbzpCd0iyVAjrnUkALmq_RkwuzV_VnaplbS1Mj_csk,3261
262
+ "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=wY8Pt8_57OLIvyELktqNe0G1p5GEwVUacZ3xEebGnLM,3262
248
263
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
249
264
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
250
265
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tG5_iVeRBHTgHX-liOf79nWRjj_lUZ-NQWTbBrBgORQ,3246
266
+ "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=DBCxmR29Qf9f78DPcZN4t_Ybr43TALNZT6F5vAP-56I,3255
251
267
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
252
268
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9tCZxJ0eAD7AYMH7OqS3AGppJUllKnJLNvMq7FMXdsA,552
253
269
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=smT1Yg8fVLAzRqQubCCxirWJ9KLbwEqCt9vP_doPv_o,3246
270
+ "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=dTe1O3UiXScsLUSZo4Gij4Q5Ae7IlL3DSnMPiZTY02Y,3250
254
271
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
255
272
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
256
273
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=QMVfMXS0Yjgob8_9xps1xuZi6KnY5l2MeKxXLRjTeg4,548
@@ -258,17 +275,21 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
258
275
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
259
276
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bSxTaptdcgj27mQGmdUmQtYTn4V_8EcmtRaVNigKjLA,3730
260
277
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
278
+ "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=inXXq68RP540fgLC5HI3gWM5fRZvuCQ_1M3nWdVZB04,3249
261
279
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
262
280
  "sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
263
281
  "sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
282
+ "sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=Jhs4odx25fgXyXSmnvbmNZWypv99ouhdBDJKD3drVBo,3249
264
283
  "sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
265
284
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
266
285
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
267
286
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
268
287
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=7PaawDEmgCGOKnf5l8oVoCEWx9l6APn25ndZ8Yrtar4,3257
288
+ "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=upLDyGsriavtW-C0uTZ2AhctVUDzxv_5XV4rwIyctec,3264
269
289
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
270
290
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
271
291
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=B0lo3SuoQXhBEnojH2TwpVeurvlKD8yI8kQrJ5ORhWU,3249
292
+ "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=5V84G_NyEFd4o8cL4y34MPWY7UqxL54cQ-79rayc684,3252
272
293
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
273
294
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZYZ03ziPGwG2sCasEYLj7ZIP7vNO8UNBR5qNTmKgRMs,549
274
295
  "sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
@@ -278,38 +299,46 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
278
299
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xxNfGtHAlxDVX7PBnqExJN0UnYlA0UbaYoXUmuX0JsI,3739
279
300
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
280
301
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
302
+ "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=bVJlejBgHjRWPDbv-5kbkz1nHjpUL6VKctg3LG1HcVI,3253
281
303
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
282
304
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
283
305
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XGNbUYyeRnb5NyfpTc1lueHjW_i49O9j9MA-MorasdI,3726
284
306
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=16Qk4BrbVQOdR9Et2T3SnLphQdvEwuuG3W3XCmAFa3s,3734
285
307
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
286
308
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
309
+ "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=JcpVETWcbX4uPmaOzswLLlaWqQtOfOkMQxniAqMZZGE,3245
287
310
  "sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
288
311
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
289
312
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YA4P3iWQcyvx9wRgvs5zOqj3MKb0i3lDTfX3iTzJh2c,3723
290
313
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=m0E9TwfZjvtopYFhI92VSaqhiUZpjBu69kv56rKMEuQ,3729
291
314
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
315
+ "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=rumUJPNGutObsu4kunFhPa7BogfRiss2qCrQf1_iOm4,3248
292
316
  "sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
293
317
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
294
318
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
295
319
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=A4uzWJTNhyNVh7ntOvUpT0TheaEVu_js0NCNdav8mTs,3730
296
320
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=uh2HLUNRGIqNulVSJrhamvMo_uExHA-S2okQd6rHB8Y,3247
321
+ "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=SXCT2-3lldJFBzSL4uk60UhGBrzV2zM34A2RkFXgXEs,3250
297
322
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
298
323
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
299
324
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
325
+ "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=lW8CYKL8FWEYfgoACK6rwLTKxAytO8eHgQPNRHbsI3w,3246
300
326
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
301
327
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FcuzcmKFf2RbaUpAaAsuObUefcGMgNPMDbVdHXRkoGY,549
302
328
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=YKPb9yvuMTBy2mnelxrN0eYcufaMH1ZgNx7_0gGEROI,3259
329
+ "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=aQVljEx0CzvgkWQz8kN89qm6Q-iQJDeK7dKrco9ooFI,3252
303
330
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
304
331
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=h32lCTFilLlyKbMeuJvNWG1v0yJJzNj93kwSvlrHfaY,3249
305
332
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
306
333
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kjQ_kvF38bZGcmaeJGSJsSR0NcUjUOh3LZ2-5c4kPvE,550
307
334
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=DM908j7iJKK0eLDR8R2aBmZC_zmMjWc2LXxLktYtqAc,3254
335
+ "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=t-maxasK-8kv32_gJWOK97QfQrsk3iXW81KGCaLm5HM,3260
308
336
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
309
337
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0jX-z2lTgVw7ABLmWsIsQdqW4EjmbXKRDHye_XPLCAE,3245
310
338
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
311
339
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=mZGU39sobtUqNYKjtyIGjhOZyCOQFJMF3MinA1zjTJA,550
312
340
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=UkXzxGGVXuq3ymqaXb1QEqnqXcXBN-mFvZDZoKHH5kE,3258
341
+ "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=tUvbbgl18cgbbmQQe2St-Of4wkhyC9QQOWgjVTBu_dQ,3255
313
342
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
314
343
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_exM3wJ3FMmGHweBcH-8IxwZBzaOmPaF3ScMM6KDpiY,3253
315
344
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ue2oWml2ouUTZelYx5Nt5pgCmY-ib3mLV1reJL9ZudE,550
@@ -317,15 +346,19 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
317
346
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
318
347
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
319
348
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=2t1lmnh4Fn67YSq4naP2g_RqYC0VtsVgTw5GS14A__w,3258
349
+ "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=syb0tPwrdYUCOoB7fTEXES0GaJ6jy_IqI3i136zudb4,3260
320
350
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
321
351
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
322
352
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ENRWYdUwI0ooHb6IwcHliupRWOPnw-7-WtxZB-qQGJI,3245
353
+ "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=7hTWsESTVJjC8xfGoLF5oZorEpncfrDGh6lXnLZhFIg,3248
323
354
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
324
355
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Nv9KP_KLGsRJdJF755dZBvbTws37u1GM2UigMRlAtl0,552
325
356
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=v2VwGLYvrYy3Nfr7CiOksjaR-XbwHu21RsXZ3J6_yfI,3258
357
+ "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=9vTtCCtEf7yhMsng-ZEAU_gvKMTq04wTgeykV0BgnTo,3262
326
358
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
327
359
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
328
360
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TdWuE2RIsIyr4Im24MuWK3XyiNtbhO_hAiAXDz5gNUk,3246
361
+ "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=nD-Ki3WuQ-MktvFHasYk8iC1syPUmfbpsocXUea9Bdw,3247
329
362
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
330
363
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZOWEo__oUy8AhJiAlRCuGNAZNdNweFdWBFptJYkwxs8,552
331
364
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
@@ -333,17 +366,20 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
333
366
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sTvaJ0RiCaQem4F1z7oES6RVRJ2gKgBuccX13S1SqGc,3733
334
367
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
335
368
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
369
+ "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=r-zndmVTFE-b1jnON9zwuBQvTBooVuj1qjf7hvjFEt8,3246
336
370
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
337
371
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
338
372
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=zuSYO0DejuHJK0dqSszTySoZUFizgjtLIXSjjOC_lpc,3726
339
373
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Rq-eMMK1deUJzmHQOH0E_pwQP7l-ZU-ECTP7Xwegavw,3736
340
374
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
341
375
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
376
+ "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=ZVFmEA61hieyp5xAyLTkzflAmWcuNtXYiAjlfAwMx7g,3253
342
377
  "sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
343
378
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
344
379
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=aoq4792zPo87QO7VrEf9fb_vj0zPiHIu7Ho9aMXwcLw,3731
345
380
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_RHvFcbtpsZBKxZte2-E3SUHtL1pwRtqwhSV4BMcyKo,3734
346
381
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
382
+ "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=FImA-TJ_tQDjqwoNWxS--sRDoKDXf9gamlME3tkxH58,3252
347
383
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
348
384
  sglang/srt/lora/layers.py,sha256=r34oprzwyE3SWPvaNkBvXWPtfa-0IY987_bjj36ySfw,9996
349
385
  sglang/srt/lora/lora.py,sha256=07-IaAfbb3zPJ-DukHL3uyQ8fjJx_hrVFHS2pqe8LZg,8238
@@ -351,7 +387,7 @@ sglang/srt/lora/lora_config.py,sha256=qDgMTx_69jyJUl29O5FxLzYa0BMhqYVXWXfyyVOvGm
351
387
  sglang/srt/lora/lora_manager.py,sha256=Wlq5dxLM7Uj4uTGpFXH1q-IOI8j4mFXYHPKSltx2QMI,7794
352
388
  sglang/srt/lora/mem_pool.py,sha256=eV_GXETxNODPVIAnTEeUUUVn0IVgguBR_mYFzIK-VHA,6835
353
389
  sglang/srt/lora/utils.py,sha256=6i7Q1Y-1LLbRkeCMv_lKIzkTN0veUTLbc8wlHn7R-bA,4571
354
- sglang/srt/lora/backend/__init__.py,sha256=98L_KRRnE3gcGcx7Lb6yjAEUUE_Yay3QszcQXdzYsDw,708
390
+ sglang/srt/lora/backend/__init__.py,sha256=FziFT8HguMFj-h0tUCc4_UEbtOWMlYi4gNlYJcArWh4,671
355
391
  sglang/srt/lora/backend/base_backend.py,sha256=dldwA7vTWrB1ln1MwLYKNtMkBoAgD7OLSlWe9tL2lzk,4602
356
392
  sglang/srt/lora/backend/flashinfer_backend.py,sha256=fXfkl7Cpw8ap2bCrgWdn_gEUzMXX1pNjNuiPw3kA76U,3984
357
393
  sglang/srt/lora/backend/triton_backend.py,sha256=ZT5M30vj8x77Kltukpga4wk1sd8fT4n_FdsOMQBTMI0,2610
@@ -360,42 +396,52 @@ sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=qve4oNZHYUFk9ckmT2BVuDNMEvrN
360
396
  sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=BmIcTZMnlSnie9rnMl4KvLpc4Njsk7_IppbUqitf9Xw,5738
361
397
  sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=kv-AvJ_Bi3yWjGvFnSwXvP66iJvY9n9pEnJzJ9-DWzo,3982
362
398
  sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=Ai5vPriT4OgACwK7xrpGgf5L1oaN9x0jwNKMChu3uI0,4299
363
- sglang/srt/managers/cache_controller.py,sha256=8idtERyZayP5rJZBcdBSnoJaB7FmeDdhgNydwetxa5E,15588
399
+ sglang/srt/managers/cache_controller.py,sha256=XB6UHFXVw5LAyKV2jK0iVzhAYT4fSzJVCNL42eKGUsk,18175
364
400
  sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMafoEADLIVfIM,1633
365
- sglang/srt/managers/data_parallel_controller.py,sha256=K6jwHn_UhsC7o_lZT5FQm9oKBbsYhlHKiDSutFlm3jA,9539
401
+ sglang/srt/managers/data_parallel_controller.py,sha256=px1SrFUUDDroXaHN4m9LcMNqGDt2vv9COiVNg1O-BqI,9561
366
402
  sglang/srt/managers/detokenizer_manager.py,sha256=HTfpJWMF1EImhKOnLJ96xPmYXm71xzaisLMfxg3zpgs,10111
367
- sglang/srt/managers/image_processor.py,sha256=NQnhbV8AYr9H6wTKulxcBd6cauBlkySNSDQzk6nKs74,23906
368
- sglang/srt/managers/io_struct.py,sha256=ltod5DPNQBJalrz420mcSZReSV1EmYQF9Edh_FiDMf4,22047
369
- sglang/srt/managers/schedule_batch.py,sha256=oYrAtlDUjiekQ8xTSnR15i19SJcNyfbiy61PBJpbMi0,52241
370
- sglang/srt/managers/schedule_policy.py,sha256=CTN9bggZ8_EiJJYJwEE6rSlfSrrVjka-hadXcQn10HM,18321
371
- sglang/srt/managers/scheduler.py,sha256=Kd80MjJEKxSfsETlacUtOZmZtF5iUI-ED3d8G8QcvHo,92144
403
+ sglang/srt/managers/image_processor.py,sha256=o2ChZW3mWSw1I_4HFE8SV1ZsYtWp9G9w7Ij73BvugN4,1711
404
+ sglang/srt/managers/io_struct.py,sha256=XvXM6WRarwEtaNQWlzZpYKy2Zi4wCG3xnz_ojskYJjM,22791
405
+ sglang/srt/managers/multi_modality_padding.py,sha256=dkFKknxML8R3KrAZhKXO-A69u3pUUI1pRfOVMJfqHYw,5165
406
+ sglang/srt/managers/schedule_batch.py,sha256=NzT2ivSSKRqtFRiAr-d2MO6spH0z-cf0jANw3JyGrr8,55983
407
+ sglang/srt/managers/schedule_policy.py,sha256=E1qVq2G3jptKdX9nlqfayeRBUll9xB6bK8nBf3EW32E,19469
408
+ sglang/srt/managers/scheduler.py,sha256=bL1HiKey5ZgyqcYFur6pfEZLbBPcKLF-peAreiNnfK8,69433
409
+ sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=u2sj6MViFTov0lVZSysZ-wph2pEqRCtCjwA1UdttZ7I,26338
372
410
  sglang/srt/managers/session_controller.py,sha256=YOv8cFcuVmdCE4OfQJ6aA5AosHRwtZF9WdUUfUXEp0I,5753
373
- sglang/srt/managers/tokenizer_manager.py,sha256=Wg_W42A04WBVTDUpRYpvIW2-RK-AiMwZ9-ytCN_2PjM,44750
411
+ sglang/srt/managers/tokenizer_manager.py,sha256=_xIn-3BDgagqnmRKFhIQTkCg2jXk6jp607B7CeprNVw,44686
374
412
  sglang/srt/managers/tp_worker.py,sha256=o9MY1a8x81nI3W0m64YvOXcAA4sCBvPPmnNU67vBXGs,8710
375
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=bFt8L6H1Tz3QWYdVx9Hl9vUbxY8xmAiaMEMFzCwd7Nk,9022
413
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=BhbVjI4DDXBIbRs1xzxME5uHy6WAxmZSJBepRu0Hv-0,9108
376
414
  sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
377
- sglang/srt/mem_cache/base_prefix_cache.py,sha256=qxgpSHm3qtMdab4U35Mr2BE9TQNjElrnrNMTwL_Osdo,1049
378
- sglang/srt/mem_cache/chunk_cache.py,sha256=gsXvfCkMRLHIvInFopoSGoDxCDd_um7VDPZaWpUF0v4,2696
415
+ sglang/srt/managers/image_processors/base_image_processor.py,sha256=6UdYo3lFgrMsn-5a8NDKhkjeJ9bxPDFRVzhXv_wV5fg,7567
416
+ sglang/srt/managers/image_processors/janus_pro.py,sha256=2Qmt2gKPT4-pUnTBhEEokiBEQ0kEjdZjl0Eq1U6y42U,2615
417
+ sglang/srt/managers/image_processors/llava.py,sha256=UdNfmAZuw__MgySC4uu5NsWgBXPyP4V4lIzDEFV9AHs,5839
418
+ sglang/srt/managers/image_processors/minicpmv.py,sha256=JS11rO_gyRVLijD2AxNM9ZJZOFxLbver_v3bqqUEhBg,2881
419
+ sglang/srt/managers/image_processors/mlama.py,sha256=ECN2rmS2Vl9eg3u-XmwtphQm7zTaB4clK2HU5ntuODE,2186
420
+ sglang/srt/managers/image_processors/qwen_vl.py,sha256=wJbbCW1DKPQp_RufQ4fz7Nd_hRldfwe97e_2YMUGmJI,6417
421
+ sglang/srt/mem_cache/base_prefix_cache.py,sha256=NY62Zo0A0tLJ7ObRLOQqQcXCxoJUDZsK8f5U4dNQjKc,973
422
+ sglang/srt/mem_cache/chunk_cache.py,sha256=it5SfL1FwMbrdeOH-I-Eu_i-I9hFB1xL-z_brIUoCkk,1835
379
423
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
380
- sglang/srt/mem_cache/hiradix_cache.py,sha256=x8cX2d88QjbEjqyOF3vT67lyFSdLem8AYAxej9NF5Jw,14256
381
- sglang/srt/mem_cache/memory_pool.py,sha256=ovqpob1luqfzg3qckANwUnEcpLTG-DJcULIZQyeFlVQ,20687
382
- sglang/srt/mem_cache/radix_cache.py,sha256=LbdorXpJ42mmNv9y_Mll0v_c5hgy0LraDCn8vK78QnA,12161
424
+ sglang/srt/mem_cache/hiradix_cache.py,sha256=UTdIqGosuU3PjGj27hmLuf2exPd9Rkg8GShVqi85FUA,15548
425
+ sglang/srt/mem_cache/memory_pool.py,sha256=xZc9pXgCwQyfv7rkgh4uoXDi2qPwueqTuw48poSV4OQ,22816
426
+ sglang/srt/mem_cache/paged_allocator.py,sha256=DRq7qZXLVeHKkCWRlEkoAmLP5IC_1ReHa_xL7-8Orzs,8594
427
+ sglang/srt/mem_cache/radix_cache.py,sha256=bLldgkFZvmFOVgc2xLI87an3UBYxIsOlRBsxenFEL7s,14912
383
428
  sglang/srt/metrics/collector.py,sha256=0X40ZZ18182sx2t0eqeqoK7gspH36L343zNvSkgBvd0,9293
384
429
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
385
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=l4coS7de4zwYbxuBokZ3MyLZUx9ygGW1BqqqMCJGNtQ,20010
386
- sglang/srt/model_executor/forward_batch_info.py,sha256=8x3y5rCMotL8iSoSG3YMd77bI6mZOoisbHIbJcBZT04,16809
387
- sglang/srt/model_executor/model_runner.py,sha256=b1NEnYaKbg_w6fu2cjdX_YdMjkpwRgBgfMf86T1JJ7w,41727
430
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=EllAbxzJy6ULgJyboVYmqG1IFYhNM6zLAAmpWARsIRo,22197
431
+ sglang/srt/model_executor/forward_batch_info.py,sha256=wtByWmk7u4YUH2ZergdjT1tyl5Jhc75S1cVvd_GTEQI,17404
432
+ sglang/srt/model_executor/model_runner.py,sha256=zE3nWMBLI0eW2mDZHBvxj1bHW3VsAxtd-RP6uddyP5A,41581
388
433
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
389
- sglang/srt/model_loader/loader.py,sha256=zDmZ7NZ9Z44EDR2H8d3bKadZD4Ey7Jmju8wBnWhM1FI,46805
434
+ sglang/srt/model_loader/loader.py,sha256=bHsb3T9CGac32NgDeRt03cJXBy-t1WsyYOWcYkeMVLA,46813
390
435
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
391
- sglang/srt/model_loader/weight_utils.py,sha256=dWEnDimMFNHbre_QMB9sG8m_L7_t1NAZk4s-vkdpSHQ,30354
436
+ sglang/srt/model_loader/weight_utils.py,sha256=26fCuVipwEiDjziwsuavwa_BC1zPB2-socfly6IjmG0,30373
392
437
  sglang/srt/models/baichuan.py,sha256=iXgta-W38OWesxmXWZJ73fUvPdu51EwTQzUD5mmfJ8s,15721
393
438
  sglang/srt/models/chatglm.py,sha256=avLC7mjjGskBxCxy-9s0sMlAJjfFoG_y8VieR1QfDsM,13918
394
439
  sglang/srt/models/commandr.py,sha256=Ug-B0QcdWZufrTybC6K5yP3MLKNsYb-vzfrqUsXYUcI,15276
395
440
  sglang/srt/models/dbrx.py,sha256=0Vf4yhqe8YeQuKR3P-agvYJScmHwH3-tFbyU8kv5QJM,15559
396
441
  sglang/srt/models/deepseek.py,sha256=Le2MXij8m4hT7QYgD0bFMFmYhbgX7SMjoXZFB8BxgyA,16871
397
- sglang/srt/models/deepseek_nextn.py,sha256=F8GCAsgq8EfW5ykx9pvQzyHPQ9q8xK5WPd8WtPV-6PU,12141
398
- sglang/srt/models/deepseek_v2.py,sha256=TwSz-UbxqHpIDxNmipDQTUOoOpxvHhM0IYSmkyRAERY,48544
442
+ sglang/srt/models/deepseek_janus_pro.py,sha256=sduOYeAQrb2DFfoeJVzxMNOAgS7_CmTqaKWc8J3Ypeg,72528
443
+ sglang/srt/models/deepseek_nextn.py,sha256=8uR4k1qCBKL1JAcE8cCJz3HowrIgCgdMtbqQ4Xq8Tr4,13161
444
+ sglang/srt/models/deepseek_v2.py,sha256=oEREP6cErJxBuQRooXY6tZ7oMwLvWLTjDYZPAfXVAS8,48651
399
445
  sglang/srt/models/exaone.py,sha256=5iibqQTjpgosuGRt2rj2lWR0ShK2XGhbdFSnOWpaQss,13386
400
446
  sglang/srt/models/gemma.py,sha256=3XxMDOKz4xMP6VzWoW8f0hmMf8LP8fhzMw5prsYC4e8,12602
401
447
  sglang/srt/models/gemma2.py,sha256=MDe_HNkSpEJpw426tbx3fp271GBlSVEuhIdGeOB_jYA,16356
@@ -403,7 +449,7 @@ sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67
403
449
  sglang/srt/models/gpt2.py,sha256=dAnfmsAL7JVHakryqrERR1jgL8mI1Op6nPHYfDCF7Ao,9802
404
450
  sglang/srt/models/gpt_bigcode.py,sha256=EAN6xAXpa8m3DcBuH1D4rTPji2oG9NSozGXSNHtE2lw,10268
405
451
  sglang/srt/models/granite.py,sha256=nu_Zl_PYn188gk1uYVZ76y4wwHZV7G0w7uanhqpSFUs,20813
406
- sglang/srt/models/grok.py,sha256=LYi-1VpGiB4SvFBc3Scp7vQTjiCODa6J_bFMjSOdsCQ,18768
452
+ sglang/srt/models/grok.py,sha256=pQOXtpHOYVntwt5QQRLffYsnMHmMfPMmGyKMfR0k0Ic,27994
407
453
  sglang/srt/models/internlm2.py,sha256=4eh9WVgK4yg13IsnH5qB2xUCWnixj_aLLz7qa_4m2_Q,13017
408
454
  sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
409
455
  sglang/srt/models/llama.py,sha256=FEtMrzjrbQLOOQvrqGFwslizjHPaU--9rEiXlr7MIiI,22958
@@ -415,47 +461,47 @@ sglang/srt/models/llava.py,sha256=J06XzASrhg2Pw1Z_jMoC1lXI4hFfoZacqS2mhiLI41k,26
415
461
  sglang/srt/models/llavavid.py,sha256=iwqwTruJTG9D1zV95RHq-RpTp1g12kKPrNIfv2a2XyI,12485
416
462
  sglang/srt/models/minicpm.py,sha256=-ot45U_Bv4x85JdbIAQXoxa1sF-ZDkBk8flU-Ruli5Y,14652
417
463
  sglang/srt/models/minicpm3.py,sha256=sRHPFUH636GIY94B-hpAN2MSzYT1pzLPVypTNjUtttY,26270
418
- sglang/srt/models/minicpmv.py,sha256=LH3IHnPJJMxPOb3rZuDReE_21aUPA56EvqFUguRcyGY,45948
464
+ sglang/srt/models/minicpmv.py,sha256=AYxGTFbvfhIgmGmM7ab6620jtCS2qabh5tJ6NrkQ_0U,43839
419
465
  sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
420
466
  sglang/srt/models/mixtral.py,sha256=6Fse2J-20IMylP-yzpEihIinaH37TmmslATbLcWBRYY,14926
421
467
  sglang/srt/models/mixtral_quant.py,sha256=MSa6UKPbgv8Rn8Iv8o1dQhcstAHLNQzE0eepFx_hYSw,15221
422
- sglang/srt/models/mllama.py,sha256=SlNDNKAlF42dtkS-JgkLNE8k70YGcGuIeg24FvTs-Us,37886
468
+ sglang/srt/models/mllama.py,sha256=jG29hG_O6YzzakDP48hITQdMiNKX6fapkQ0duiyAD90,37887
423
469
  sglang/srt/models/olmo.py,sha256=FJk8A3T3TF5QcTV6rMP8np94QtvxpMWlgCsv_5VwpVE,12632
424
470
  sglang/srt/models/olmo2.py,sha256=U0ScFzWazOrb_Q90sfXkpVNAsXT-pgZbNgGh80R40VE,14288
425
471
  sglang/srt/models/olmoe.py,sha256=tx5OKWLOr6_pohe2eBcIodCmcuSjtpteHq_tG_QVYCY,15910
426
472
  sglang/srt/models/phi3_small.py,sha256=_ZDXVJN3B5f-46MMKxNim9874cVqJpzBipbmfBz6Hn4,15464
427
473
  sglang/srt/models/qwen.py,sha256=edS0UYq6AoHZdYUJtQa5wyFNzZMW0JAMmBulH2uheaw,10719
428
- sglang/srt/models/qwen2.py,sha256=KkMd6fxF2dn77RfrH_Ayc4w0uZmOLtwPlsVzDGIMW8g,15980
429
- sglang/srt/models/qwen2_5_vl.py,sha256=QQFFPV4t35grzjVdB13SCPxxLiJDXLQ_tBk62DRljq4,29113
474
+ sglang/srt/models/qwen2.py,sha256=_jo9Ox7MrXCicw7Lhjla29UtRSXfsNd079So81rNcLM,15947
475
+ sglang/srt/models/qwen2_5_vl.py,sha256=oA6Cz3-3RQN-D9B88s8dybV8idGjpdu2Fcm60i77Uqs,28184
430
476
  sglang/srt/models/qwen2_eagle.py,sha256=Iz0HWL2FgSD3FqoFhfYmbIZeEYkPTJ96lYbkncmHJX4,4644
431
477
  sglang/srt/models/qwen2_moe.py,sha256=zYLJecN1mUyMBmnZoVaJd8LUKT4YZPBIO1lfFOqmU-c,17755
432
478
  sglang/srt/models/qwen2_rm.py,sha256=-mQXDEv11p-I1HXgYLTtY6ROem6UYorO958WsDrzsgs,2837
433
- sglang/srt/models/qwen2_vl.py,sha256=7xe2aS7V3_0z85ohegpz7DQLU1JN3LgvLpG_miyb-Ms,24520
479
+ sglang/srt/models/qwen2_vl.py,sha256=G1Hx3n-5pXRtVHEtS-UjiQTr4AvltXwQ4PyroxNGfcE,23918
434
480
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
435
481
  sglang/srt/models/stablelm.py,sha256=w93fNXpDwQbuKi4tdeo0bsXFZrMZVY4_pgNL0E5RErQ,12242
436
482
  sglang/srt/models/torch_native_llama.py,sha256=5tfFSMAXB3ScToqTALtCXa8Oo-qPCJh-KQCNB6QOlNA,19293
437
483
  sglang/srt/models/xverse.py,sha256=I7ivNsk6NRqPxlMUmdclpzDCvhAnWbv_GOj01MKHJrQ,13996
438
484
  sglang/srt/models/xverse_moe.py,sha256=xLwn5pRwQrvj7zMmwl3o49m7xILb2ACRdWvm9hY8LDc,16743
439
485
  sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
440
- sglang/srt/openai_api/adapter.py,sha256=FqYLICnYL53UwJT2OJPPJrMJxK-p372-cFl0TmZOQBE,66142
441
- sglang/srt/openai_api/protocol.py,sha256=KLh9u9On595L7EDXmg76X8mhCw4yLDUjY7zvdjlwPWM,12336
486
+ sglang/srt/openai_api/adapter.py,sha256=GAAo_Vuf9aiKgUH0NPanDTB_eC1WqRRNnltVvAFeW4s,67412
487
+ sglang/srt/openai_api/protocol.py,sha256=6-vwYewe7JfmMMC56oRU13aRmmKUerZs50GPmiGeX7w,12490
442
488
  sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
443
489
  sglang/srt/sampling/sampling_batch_info.py,sha256=T4UJ_CRB6A0HnRwEsqmxy1CJQMwZaVdxbdlCeTK4BUI,11992
444
490
  sglang/srt/sampling/sampling_params.py,sha256=HihGfhdR4FmOeltEqpW2kSLfNu94VCd8l0RNOQFSl-Q,5919
445
491
  sglang/srt/sampling/penaltylib/__init__.py,sha256=mtN8grFEcaBUhl4yBHmw8NNirt_i6uKO2cDNLHOpZQE,496
446
- sglang/srt/sampling/penaltylib/frequency_penalty.py,sha256=wdWLmhYnJkuS5qjFGbSLsWBvmYyKR77HIKFl5T_aavY,2232
492
+ sglang/srt/sampling/penaltylib/frequency_penalty.py,sha256=Loc3qjJTksNc5s-DV7QZHjgqoo5pxk7-nZzxwyhD2tQ,2144
447
493
  sglang/srt/sampling/penaltylib/min_new_tokens.py,sha256=rdU_D7RoIcrQPhysNQEzmr4TO2OoEi___p-i3QdwkgU,3331
448
494
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=XM-Lm6u7gYPtMZrTIc0FR4QxNZxBH5s_Cj82umyCzYk,5721
449
- sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=ZU18IAOSd1qNcZdsP47TS-gjM-jVq9s8YuQWhUZ7xZI,2205
495
+ sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=NRh10AJrrQlGJ6S-enGdRefrTrWpyqrSm-aNnyqQNQQ,2119
450
496
  sglang/srt/speculative/build_eagle_tree.py,sha256=8_uUpkQAE0qcn5mA6NPjfl81EMuNxg4fZq628wjEZNU,20805
451
497
  sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=zsF4AcsykaFjzZ5SKdvUQyEB1GVXTvR1_kI_oaQafBo,7704
452
498
  sglang/srt/speculative/eagle_utils.py,sha256=DhpNNNN56jw0ucXSBYq9_IzDuJmEKgXS6lbLczaghNs,27196
453
- sglang/srt/speculative/eagle_worker.py,sha256=05bUiqQdO6B9dwfSW1BOEtez6HX24OHwJfm6bFPJa7I,18807
499
+ sglang/srt/speculative/eagle_worker.py,sha256=ewqd3NxP0q8CHIhsigHYATv1SUnVl6SU7sUo5aLnFhE,20437
454
500
  sglang/srt/speculative/spec_info.py,sha256=hJR0b3gZ0QA0KZLq6TfqSDJkmpV6mThyle3sHYI4o4M,522
455
501
  sglang/test/few_shot_gsm8k.py,sha256=7VLbWl4nCQs1wjtW4q-46jf9jUCycSs5Iw8v7sUSzBw,4284
456
502
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
457
503
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
458
- sglang/test/runners.py,sha256=HjMMlQtyYSzVYE9vHYaaWW5KEEJg6D5nklQIgvAsuFc,25189
504
+ sglang/test/runners.py,sha256=4ZH0AtzMNFzxLXCxJlKpQOLl5C3jkut9YkDAvGYxW5w,29191
459
505
  sglang/test/send_one.py,sha256=6FhbJ3c8RpXxvFTELRXaF97GpT7zXXsCDYZh1DqG22E,2550
460
506
  sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
461
507
  sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
@@ -464,13 +510,14 @@ sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWU
464
510
  sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
465
511
  sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
466
512
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
467
- sglang/test/test_block_fp8.py,sha256=rhrIun8aW5zq2qvuGRlo7F7aZ_upjVxtQMVlyc2Th_E,11771
513
+ sglang/test/test_block_fp8.py,sha256=bHYgQijsx0D4q0CgoZv6jOQLAWUz6QQC1cgT2oLLOnE,14420
468
514
  sglang/test/test_block_fp8_ep.py,sha256=hkuQjmCv3y_hWZj21cT9EaB6KSfT3JSzYPRQNFaLP-Q,10759
515
+ sglang/test/test_custom_ops.py,sha256=XBTWh3jEsoe9hZ93p3LAUtjEj5l0qNEaZM5Mto6pbJA,3262
469
516
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
470
517
  sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,18877
471
- sglang/test/test_utils.py,sha256=jVkIdnzkQ5ZGynWkfFMd6GLonJwq_2a6iZAvPLUFlZo,28549
472
- sglang-0.4.3.post4.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
473
- sglang-0.4.3.post4.dist-info/METADATA,sha256=I3MCfxnWrPkIIQdq1rdqEqm0_1QkyR_QmVZ-jcIbHZU,24409
474
- sglang-0.4.3.post4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
475
- sglang-0.4.3.post4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
476
- sglang-0.4.3.post4.dist-info/RECORD,,
518
+ sglang/test/test_utils.py,sha256=UBBk-F4A5hEtmRdu7vEttlnSmPVPJLQgtNZxMn1Tdo4,29188
519
+ sglang-0.4.4.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
520
+ sglang-0.4.4.post1.dist-info/METADATA,sha256=P0EmlGnVHDnvAojlrFQw_2CrPN2qERG6VIUoR6kG4vo,24311
521
+ sglang-0.4.4.post1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
522
+ sglang-0.4.4.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
523
+ sglang-0.4.4.post1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5