sglang 0.4.5__py3-none-any.whl → 0.4.5.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. sglang/bench_one_batch.py +21 -0
  2. sglang/bench_serving.py +10 -4
  3. sglang/srt/configs/model_config.py +37 -5
  4. sglang/srt/constrained/base_grammar_backend.py +26 -5
  5. sglang/srt/constrained/llguidance_backend.py +1 -0
  6. sglang/srt/constrained/outlines_backend.py +1 -0
  7. sglang/srt/constrained/reasoner_grammar_backend.py +101 -0
  8. sglang/srt/constrained/xgrammar_backend.py +1 -0
  9. sglang/srt/disaggregation/base/__init__.py +8 -0
  10. sglang/srt/disaggregation/base/conn.py +113 -0
  11. sglang/srt/disaggregation/decode.py +18 -5
  12. sglang/srt/disaggregation/mini_lb.py +53 -122
  13. sglang/srt/disaggregation/mooncake/__init__.py +6 -0
  14. sglang/srt/disaggregation/mooncake/conn.py +615 -0
  15. sglang/srt/disaggregation/mooncake/transfer_engine.py +108 -0
  16. sglang/srt/disaggregation/prefill.py +43 -19
  17. sglang/srt/disaggregation/utils.py +31 -0
  18. sglang/srt/entrypoints/EngineBase.py +53 -0
  19. sglang/srt/entrypoints/engine.py +36 -8
  20. sglang/srt/entrypoints/http_server.py +37 -8
  21. sglang/srt/entrypoints/http_server_engine.py +142 -0
  22. sglang/srt/entrypoints/verl_engine.py +37 -10
  23. sglang/srt/hf_transformers_utils.py +4 -0
  24. sglang/srt/layers/attention/flashattention_backend.py +330 -200
  25. sglang/srt/layers/attention/flashinfer_backend.py +13 -7
  26. sglang/srt/layers/attention/vision.py +1 -1
  27. sglang/srt/layers/dp_attention.py +2 -4
  28. sglang/srt/layers/elementwise.py +15 -2
  29. sglang/srt/layers/linear.py +1 -0
  30. sglang/srt/layers/moe/ep_moe/token_dispatcher.py +145 -118
  31. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  32. sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
  33. sglang/srt/layers/moe/fused_moe_triton/configs/{E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json → E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json } +34 -34
  34. sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
  35. sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  36. sglang/srt/layers/moe/fused_moe_triton/configs/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  37. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +38 -21
  38. sglang/srt/layers/moe/router.py +7 -1
  39. sglang/srt/layers/moe/topk.py +37 -16
  40. sglang/srt/layers/quantization/__init__.py +12 -5
  41. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +4 -0
  42. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +68 -45
  43. sglang/srt/layers/quantization/fp8.py +25 -13
  44. sglang/srt/layers/quantization/fp8_kernel.py +130 -4
  45. sglang/srt/layers/quantization/fp8_utils.py +34 -6
  46. sglang/srt/layers/quantization/kv_cache.py +43 -52
  47. sglang/srt/layers/quantization/modelopt_quant.py +271 -4
  48. sglang/srt/layers/quantization/w8a8_fp8.py +154 -4
  49. sglang/srt/layers/quantization/w8a8_int8.py +1 -0
  50. sglang/srt/layers/radix_attention.py +13 -1
  51. sglang/srt/layers/rotary_embedding.py +12 -1
  52. sglang/srt/managers/io_struct.py +254 -97
  53. sglang/srt/managers/mm_utils.py +3 -2
  54. sglang/srt/managers/multimodal_processors/base_processor.py +114 -77
  55. sglang/srt/managers/multimodal_processors/janus_pro.py +3 -1
  56. sglang/srt/managers/multimodal_processors/mllama4.py +21 -36
  57. sglang/srt/managers/schedule_batch.py +62 -21
  58. sglang/srt/managers/scheduler.py +71 -14
  59. sglang/srt/managers/tokenizer_manager.py +17 -3
  60. sglang/srt/managers/tp_worker.py +1 -0
  61. sglang/srt/mem_cache/memory_pool.py +14 -1
  62. sglang/srt/metrics/collector.py +9 -0
  63. sglang/srt/model_executor/cuda_graph_runner.py +7 -4
  64. sglang/srt/model_executor/forward_batch_info.py +234 -15
  65. sglang/srt/model_executor/model_runner.py +48 -9
  66. sglang/srt/model_loader/loader.py +31 -4
  67. sglang/srt/model_loader/weight_utils.py +4 -2
  68. sglang/srt/models/baichuan.py +2 -0
  69. sglang/srt/models/chatglm.py +1 -0
  70. sglang/srt/models/commandr.py +1 -0
  71. sglang/srt/models/dbrx.py +1 -0
  72. sglang/srt/models/deepseek.py +1 -0
  73. sglang/srt/models/deepseek_v2.py +248 -61
  74. sglang/srt/models/exaone.py +1 -0
  75. sglang/srt/models/gemma.py +1 -0
  76. sglang/srt/models/gemma2.py +1 -0
  77. sglang/srt/models/gemma3_causal.py +1 -0
  78. sglang/srt/models/gpt2.py +1 -0
  79. sglang/srt/models/gpt_bigcode.py +1 -0
  80. sglang/srt/models/granite.py +1 -0
  81. sglang/srt/models/grok.py +1 -0
  82. sglang/srt/models/internlm2.py +1 -0
  83. sglang/srt/models/llama.py +1 -0
  84. sglang/srt/models/llama4.py +101 -34
  85. sglang/srt/models/minicpm.py +1 -0
  86. sglang/srt/models/minicpm3.py +2 -0
  87. sglang/srt/models/mixtral.py +1 -0
  88. sglang/srt/models/mixtral_quant.py +1 -0
  89. sglang/srt/models/mllama.py +51 -8
  90. sglang/srt/models/mllama4.py +102 -29
  91. sglang/srt/models/olmo.py +1 -0
  92. sglang/srt/models/olmo2.py +1 -0
  93. sglang/srt/models/olmoe.py +1 -0
  94. sglang/srt/models/phi3_small.py +1 -0
  95. sglang/srt/models/qwen.py +1 -0
  96. sglang/srt/models/qwen2.py +1 -0
  97. sglang/srt/models/qwen2_5_vl.py +35 -70
  98. sglang/srt/models/qwen2_moe.py +1 -0
  99. sglang/srt/models/qwen2_vl.py +27 -25
  100. sglang/srt/models/stablelm.py +1 -0
  101. sglang/srt/models/xverse.py +1 -0
  102. sglang/srt/models/xverse_moe.py +1 -0
  103. sglang/srt/openai_api/adapter.py +4 -1
  104. sglang/srt/patch_torch.py +11 -0
  105. sglang/srt/server_args.py +34 -0
  106. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +4 -4
  107. sglang/srt/speculative/eagle_utils.py +1 -11
  108. sglang/srt/speculative/eagle_worker.py +6 -2
  109. sglang/srt/utils.py +120 -9
  110. sglang/test/attention/test_flashattn_backend.py +259 -221
  111. sglang/test/attention/test_flashattn_mla_backend.py +285 -0
  112. sglang/test/attention/test_prefix_chunk_info.py +224 -0
  113. sglang/test/test_block_fp8.py +57 -0
  114. sglang/test/test_utils.py +19 -8
  115. sglang/version.py +1 -1
  116. {sglang-0.4.5.dist-info → sglang-0.4.5.post1.dist-info}/METADATA +14 -4
  117. {sglang-0.4.5.dist-info → sglang-0.4.5.post1.dist-info}/RECORD +120 -106
  118. sglang/srt/disaggregation/conn.py +0 -81
  119. {sglang-0.4.5.dist-info → sglang-0.4.5.post1.dist-info}/WHEEL +0 -0
  120. {sglang-0.4.5.dist-info → sglang-0.4.5.post1.dist-info}/licenses/LICENSE +0 -0
  121. {sglang-0.4.5.dist-info → sglang-0.4.5.post1.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  sglang/__init__.py,sha256=8J5PHcfRJul4R1NJnt0BtW05aVQNWqp8PyalLPOTZCA,1669
2
2
  sglang/api.py,sha256=vHiKBg8wwIdmrpnGclop5BzJ-1Q88emrlrfLwNCHg98,7010
3
3
  sglang/bench_offline_throughput.py,sha256=OQb-AjL4UNymmir02ht43uzgaNsnO_I11nXSowKMqBI,13841
4
- sglang/bench_one_batch.py,sha256=Fp6HBBJHrw672Q1gnklJ7dYboYYjR92D2fNCvbrM3M0,17935
4
+ sglang/bench_one_batch.py,sha256=upXSMMlhO53J2cAP9lAb3KWt0nTEUdkNRWGk0lobS08,18887
5
5
  sglang/bench_one_batch_server.py,sha256=8VYNhaQbWGP8TkNVuy_sPjD5FiuVZHamtGRWKwa-Z-Q,5962
6
- sglang/bench_serving.py,sha256=DKCg7l1uaDlKUB45AIpFaZLesA-sRTV-meJ-50sucXE,57410
6
+ sglang/bench_serving.py,sha256=ek6D6uw0IlsMb0lhg57rBq7q4au7Os78GUMXopy0Wfk,57702
7
7
  sglang/check_env.py,sha256=76itNLUw9KlqbiY1BI4u4YaMZaqyCNcrCLUIb6aHflM,8396
8
8
  sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
9
9
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
10
10
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
11
11
  sglang/utils.py,sha256=GIcgiRHkZ-gyPxXOdn1qFF41jkg4-YdDxbPc4mzO-qk,16159
12
- sglang/version.py,sha256=ErkLkI2TDBX1OIqi2GGa20CPeu4ZculEi-XffRbLU6M,22
12
+ sglang/version.py,sha256=mfIdQ0Yo6GF2VyWIDGyBUw_42D590eNsz05qnm3UXM4,28
13
13
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  sglang/lang/chat_template.py,sha256=MwNL5dNTe8g_l2ljZubnrazEgT2xEv-9O2D0Ezwxy4I,19658
15
15
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -30,15 +30,15 @@ sglang/srt/code_completion_parser.py,sha256=HhEUzdL-FVBsOot9tKDKA1l8Gdx8qsF1RRg-
30
30
  sglang/srt/conversation.py,sha256=WP72AZrZpiqc5RowucT2tW3jVCb1pb4veW_kpwYS4yY,28785
31
31
  sglang/srt/custom_op.py,sha256=bIZ__3FiZvkbsN9O_jeLy_49X7ZbYbw0VxoL80uWwaI,3715
32
32
  sglang/srt/function_call_parser.py,sha256=buYENeNEP5bhsvD424yGCa9wOqSfVOZSRn6zLiSJp5I,23733
33
- sglang/srt/hf_transformers_utils.py,sha256=_QYTl9LpU0jmKPlYooHi1etwMvb5v40JIrG_t_Fx06w,9215
33
+ sglang/srt/hf_transformers_utils.py,sha256=N2f-gA8yUq-UP_TJT276gNbDNzmddWsmWnq3px6TIj8,9342
34
34
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
35
35
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
36
- sglang/srt/patch_torch.py,sha256=Fw_QGqSsAdyCumi0dT2HyPlppf9xd3-tQPvwuBZfhxU,2625
36
+ sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
37
37
  sglang/srt/reasoning_parser.py,sha256=45xsU9RCPfyG4_Zx4y3-JPyNgAtrqwKI4j5R2NT4g1s,5594
38
38
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
39
- sglang/srt/server_args.py,sha256=eb3zJIpljzHK_ajp_zJRgwRUM_00-S-7K15k2opaBK4,52467
39
+ sglang/srt/server_args.py,sha256=k42YCDTbEEZZShmoaorQGNRwMxYACDSpvGW2toTb2DQ,53778
40
40
  sglang/srt/torch_memory_saver_adapter.py,sha256=KG3wM9-xZsSdsmORofArnNR7hH55GEyFxaderCDcK9w,1853
41
- sglang/srt/utils.py,sha256=UyNimlcXkBG5cp-6ah3GaBBTUCpOA-OZh0NaNNh7QgA,57507
41
+ sglang/srt/utils.py,sha256=KAEwcTWJZInclD6tGvIEDcXpVW726l6n86FcbrHoCng,60992
42
42
  sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
43
43
  sglang/srt/configs/__init__.py,sha256=vulncVn70WqIT6s0HaB8p_Q6FjOiaLwNZWpoJS9FIuQ,399
44
44
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
@@ -48,7 +48,7 @@ sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51D
48
48
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
49
49
  sglang/srt/configs/janus_pro.py,sha256=-QtJ4ZGZiAJb0AkOEcuCHzIKLw23nF8nRk3rdCcoUO0,19016
50
50
  sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
51
- sglang/srt/configs/model_config.py,sha256=ZioUnc5UzsBVEYHE_GgCofYL97MByZm2NfHikS9HwLo,20771
51
+ sglang/srt/configs/model_config.py,sha256=oMXM1CardGkJThm0KgCxxgYnwXaJXFZzmUZIM_wugeU,22046
52
52
  sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
53
53
  sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
54
54
  sglang/srt/connector/base_connector.py,sha256=i6i1TIzsz4NbSEkrdMPq-urb2sN2aLAx8dazga4gB9U,2833
@@ -58,16 +58,21 @@ sglang/srt/connector/utils.py,sha256=isTvxauz1_8no5MW6p8Bwj2H9mQkweaRO_RSwAPA7R8
58
58
  sglang/srt/connector/serde/__init__.py,sha256=MvBJ7FBJtnou-AD-EdsCdAIDCcA8kWvUNuRViILVJ-0,718
59
59
  sglang/srt/connector/serde/safe_serde.py,sha256=Iv_mFsDvk-cXuw0WTykBZ2yGxW0jb82DwVqC7r3RmAU,750
60
60
  sglang/srt/connector/serde/serde.py,sha256=n59I2MXLa7WCyN_8pEd8L-scJk7lMhmEX-GOUIhF0ZA,1004
61
- sglang/srt/constrained/base_grammar_backend.py,sha256=MzAA7oqWOZ12ndUs158FGECjtKNx_2_mDMZ7Jopb9Pk,6899
62
- sglang/srt/constrained/llguidance_backend.py,sha256=ej7wN13SzCsT310C6OIyUg2zs5jeuLl3Ocok9SP9-c4,5702
63
- sglang/srt/constrained/outlines_backend.py,sha256=UWv2xjg8x4XtoqpY8LoorlJaYOZhfDeIr5YCiFn4knA,6812
61
+ sglang/srt/constrained/base_grammar_backend.py,sha256=ljTVWpBo3bolce-E_-mtHIY2XWez4qcyDPeaIeZyIhM,7454
62
+ sglang/srt/constrained/llguidance_backend.py,sha256=Kgd-PQVBQlKWsz506OpF_xSdNBhEbvFywzICTZg21iM,5729
63
+ sglang/srt/constrained/outlines_backend.py,sha256=XbmkZSJzJnnY7k11uj8Et3StfuOiFwRs3ID4IRYAA4Q,6839
64
64
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
65
- sglang/srt/constrained/xgrammar_backend.py,sha256=W7_qyyQiOUwejIPCnWgJrp6ka5fy137SiJtxt3VNruM,6220
66
- sglang/srt/disaggregation/conn.py,sha256=amOujTy2jFwdfYhGxuTLAMNWGPbIKGoAwWDqKxZ06gc,1950
67
- sglang/srt/disaggregation/decode.py,sha256=5pgXeIQBBJXQpVXpRm7vAauRmy5DtIi8953dZFBAPeA,18075
68
- sglang/srt/disaggregation/mini_lb.py,sha256=upwG_4CdurUUiPuYGUO4OJQu22lDx6gnsM0xKv5QRmg,10692
69
- sglang/srt/disaggregation/prefill.py,sha256=zw8hDy6Txq_MpC5j0fndLNcKoypT2BhxTkqqTuilMCE,9053
70
- sglang/srt/disaggregation/utils.py,sha256=ebOZ3lSFVkbNtl6uUfS6sYYYVBjgmWdQLOsqIZBGgN0,1088
65
+ sglang/srt/constrained/reasoner_grammar_backend.py,sha256=XFxdZqvPofmtCeIMqR10NOyph06HwbdXfiVI8rIoV5s,3646
66
+ sglang/srt/constrained/xgrammar_backend.py,sha256=Xf4CiU30XCa_RM4bgFkCw1yLeH4wijfewUHIYjnkv-k,6247
67
+ sglang/srt/disaggregation/decode.py,sha256=qV0TU_nasIWTCXAngB7g3t5uJ1a8nmYW-KGXdIzaG0I,18624
68
+ sglang/srt/disaggregation/mini_lb.py,sha256=ZU4M7ZtdKUDzpmNMT_NDFnTdbHzoGIxXSHW2PLdnoLs,7511
69
+ sglang/srt/disaggregation/prefill.py,sha256=AeO4VcCKJ6X3c-GVY81G2aGA0bz9nNXiYNRXFOBWzWo,9954
70
+ sglang/srt/disaggregation/utils.py,sha256=gbJIFpYM8XpW4aTThPGhny79jl9aBxOIiT2swJpS_Y8,2017
71
+ sglang/srt/disaggregation/base/__init__.py,sha256=KR8xXoRCDAy2U623mfP6ujXu42m1_F9EiudjrKu2I_A,130
72
+ sglang/srt/disaggregation/base/conn.py,sha256=gpf32bhYXWm_iaYB6WcrDaJ-UoL1ZzPI_xpi5pMhRQo,2443
73
+ sglang/srt/disaggregation/mooncake/__init__.py,sha256=1vacEHmWjf7zgbMPzsXKB08FqNKNCquJdUiDlO41BOk,122
74
+ sglang/srt/disaggregation/mooncake/conn.py,sha256=LkM9X7Rf3H4hjfSUQh4Yq7icvryTrPg_tszb3_hakcg,22210
75
+ sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=qy-0HYLhaz90ompOtcOvB5jZhI97iAH1tl7dNvbleGc,3457
71
76
  sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
72
77
  sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
73
78
  sglang/srt/distributed/parallel_state.py,sha256=hoTgLYfHIKMb_tSwBTauuusJZ8oY9BsiubTTOF8UfIw,50713
@@ -80,46 +85,48 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
80
85
  sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
81
86
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
82
87
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
83
- sglang/srt/entrypoints/engine.py,sha256=1ML85N-iF22n2Id2dpbYrKxxfkqnVUXP26kEGUfcA2E,21146
84
- sglang/srt/entrypoints/http_server.py,sha256=VM-gVwW-Ef_SikxoVSE06yydlLo6TGJykjKgffnGoXY,27104
85
- sglang/srt/entrypoints/verl_engine.py,sha256=PypBCkUJYy7iP3mKB-W0KYkjZzs4Rq6DqxNgG_nBZaM,5903
88
+ sglang/srt/entrypoints/EngineBase.py,sha256=xoyvp6XAeDLY2_Q2Ng33H-fRhrXHv2ldJJKd-HuDhqE,1870
89
+ sglang/srt/entrypoints/engine.py,sha256=cO-Yq5i_hrn_yaAuhkHKkUUVXQmHXcTV4B-l76LjbwU,22137
90
+ sglang/srt/entrypoints/http_server.py,sha256=wYjyyiajP6SWa3auZHZIUJv30zioB0IwdFKXHlyT5zo,28431
91
+ sglang/srt/entrypoints/http_server_engine.py,sha256=ihA6y3GXRs28Y9U3SgdQcJQjnw_SVIby7QrVgiafX04,4846
92
+ sglang/srt/entrypoints/verl_engine.py,sha256=sqQKt-HnEdCfR7CkiyskY7E_BeQMxASMDe_Hq91ni1I,6949
86
93
  sglang/srt/layers/activation.py,sha256=1ykXZO0BGz7DFVE-EK26b02I5AgH2IuU4PQB6oUcF4M,6003
87
- sglang/srt/layers/dp_attention.py,sha256=fC1kaYkHGoFjZ2KHTzPFW0e93El-XLRt7ZidkXYIVhE,7595
88
- sglang/srt/layers/elementwise.py,sha256=y2mQqjbF2FmFtNYBk5ecTyaj3ELoZyz-rWPY8rrxCtk,13765
94
+ sglang/srt/layers/dp_attention.py,sha256=Tfw2BydGPAeLcDlyl0jzBLOtkiygJhgAcf63RGZhrEE,7535
95
+ sglang/srt/layers/elementwise.py,sha256=XCrR2i-9dP-H6jQo2zUuquwZrsl_wEQqj5Wxk6WUf7o,13987
89
96
  sglang/srt/layers/layernorm.py,sha256=189bORMggKhYcEYEjl6JRcuIoUPllHo3SheoH6YiORY,4546
90
- sglang/srt/layers/linear.py,sha256=HYIGxpRYL6x-jNOkyNtGAw5Ak9Nq8jkntddgTBER_1w,51486
97
+ sglang/srt/layers/linear.py,sha256=etyzpgJbOUWCiu8abUovZQ8IFJ3ObeqOVHocBzFZOP0,51517
91
98
  sglang/srt/layers/logits_processor.py,sha256=Vp8ibljVEezTr54xzeOcjiJR7JdYO8ItkO5nLIIMVu0,24206
92
99
  sglang/srt/layers/parameter.py,sha256=0OTMtmsNds42e3z3wHTRJiUfxCWFwSL6DHrqgeTgGt8,15151
93
100
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
94
- sglang/srt/layers/radix_attention.py,sha256=4xRq0w9yDfAVdNlBToQpmc7irq-pomJm-GlIfMtpYtk,2328
95
- sglang/srt/layers/rotary_embedding.py,sha256=1nznPQ6EfVMDSRabKqifEE2xtMFwSri-kEepMaWdZeI,45340
101
+ sglang/srt/layers/radix_attention.py,sha256=VeE8wX-8eNaSfw2JYzrek7EjgdVltEujnkMa_u92hCc,2865
102
+ sglang/srt/layers/rotary_embedding.py,sha256=qvRNHWJxsyz897ntsEEr9iTj9ZNV_HgWUH7PvBE9ydw,45738
96
103
  sglang/srt/layers/sampler.py,sha256=yipSyN5UWGwGS-BC-WzWMmelys4CCDtK_8b1OpaK6sM,11622
97
104
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
98
105
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=QUxd4sELx6p3dHvEKmccPZ-phdd_9EjNdwjH3SJ9zxI,22238
99
106
  sglang/srt/layers/attention/base_attn_backend.py,sha256=X_GIbQuU9njtUEGdUP7E_KRhmGxj3UyPHNESlL3QaQ8,3264
100
107
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
101
- sglang/srt/layers/attention/flashattention_backend.py,sha256=ORtcSJUDbV2qfKGkq9ohiy8JJ1SU9R2I5fSMizF4EhI,42572
102
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=3fxS2NQzCBw7h_gLxBjHcyDkf2quWqBxr_N01lYmfJo,45865
108
+ sglang/srt/layers/attention/flashattention_backend.py,sha256=QKr_X_7fp96cbghJJj3qlzfN_ZeIZ-0a4wUMcSwwj-o,49327
109
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=QqHbVoXp9LqoVvoGebXoo9GcrU7LaRRRh5sG93Daa0s,46277
103
110
  sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=pnVhvVEK87iFW8gUb1G7X7c1tqro8R2DSEOFCnlV8Bo,30301
104
111
  sglang/srt/layers/attention/flashmla_backend.py,sha256=1RPFNtQOBw6BWxIjrzfJgA9Nx92udLbR-S5KXmqjxS8,10536
105
112
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KABmBrMqKa4x08kkQYdIcZUGydvmaVJIUfo3y8jhFHI,9270
106
113
  sglang/srt/layers/attention/triton_backend.py,sha256=cyxOaUU1CNhaEezJH9j0dd20cwxwIVGGN3jNXFTVkIY,26714
107
114
  sglang/srt/layers/attention/utils.py,sha256=J9mA-cbZT3uTlaKXo0HEAaeMei_TS2o4McTna9LVDCE,2750
108
- sglang/srt/layers/attention/vision.py,sha256=hUUkMyhl2WbKAY8ykvolBWgIMHuihiZ7w_UkJVxUQUo,11890
115
+ sglang/srt/layers/attention/vision.py,sha256=H7dQofAlZjC48Dr6bo4HBsZSLDhdrbHKChyGejeaEGo,11886
109
116
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=DPu_aCPgwPqKWZPEQmp_xA7MPbpV2ip-MEICCB470Ao,19120
110
117
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=BXUY8ARHBF2s9x9waiEwfZwcMgvuaJA0gxb4OeUZ_tY,31167
111
118
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=hbBvnhc2zqu-E3HNROVXyNOZbtDkVRuFus-yTjmE0Sg,13668
112
119
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
113
120
  sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
114
121
  sglang/srt/layers/moe/fused_moe_native.py,sha256=bf0po921lY9xnlZivdJly0bGIYFlLqp5v8Mz7tG5bdg,4451
115
- sglang/srt/layers/moe/router.py,sha256=gvyK7hXlujfCZCmAIFc3oxfgjuAjzlpPe3mp1Blc6Y0,10419
116
- sglang/srt/layers/moe/topk.py,sha256=iUb-64CaNAUfvBZ1pkgsedcLRQs2sVSIzQ5300WmdXI,10242
122
+ sglang/srt/layers/moe/router.py,sha256=5Aeqoix_AS4uymb665OJE904wVSBkQeFdZP4e7KKPvg,10530
123
+ sglang/srt/layers/moe/topk.py,sha256=xjkfC50rapfX27nI2078SoKGXAo82waQVArPVDH8Ehc,10843
117
124
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
125
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=ijqRzS-tb0LGnDU5hW-g0JH104ppADrWaUIDGxb9Feo,22919
119
126
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=1TmWnxv-bW1Qbgru-V-vGnt3ruuTIwHQy0Y5ZA_xzvE,36824
120
- sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=jnr6KSM8YooftTjZ3gYe0eWpOd1dmkXqk4hKRvLTwCo,19708
127
+ sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=zQV7Qr-Zrcr3D3efVvZepRQM02bj5djHPsijPssavk8,20430
121
128
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
122
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=sjAXnjUmLXPpvFFL4VShBce_9xygWY2twAQJ74OJ_ZQ,54500
129
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=6WbcChGdZmSXl5_WlpC0w1cn_QH69mvICQt0pMBO_nk,55474
123
130
  sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=43-UL9KEMoaiC0cRSzWFbg2PADtcoxfZqjZ6TOvQ7Vk,24551
124
131
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
125
132
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
@@ -165,6 +172,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=43-UL9KEMoaiC0cRSzWFbg2PA
165
172
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=MW7KzNa7DcKm53u2Jh-mnb93A3ICefgQHkdKONJMfew,3255
166
173
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
167
174
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json",sha256=JEOXj48phwoumZWBwNq9TpqxVwIrbnfot4QfAdzvLJI,3249
175
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Lqom_VMIPduSZTZQdeL2Wl_x3r9q6RmI9bojJrYwQZ4,3255
168
176
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XmKFaMheq7NNrsvYCJteul0w809l_l460ZiDQC9ToGs,3262
169
177
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
170
178
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
@@ -175,8 +183,12 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=43-UL9KEMoaiC0cRSzWFbg2PA
175
183
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=Y4WpmVGlGERHoeoQNGkQ-GC-MsEtMblqnAVuDbARJdw,3240
176
184
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json",sha256=RUkd9fW9WbajF_fFIzppsE1qyWGR5aRC4Cln-BPdu28,3254
177
185
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json",sha256=Sc9xK1wtRUqIzXppbutcq-Y2e9M0DZl2OGVzzB0aQuI,3265
178
- "sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=z8Iedw3N22cWXTCPhVBBk-yZqkc30ePMzv9KkgPoOd0,3266
186
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=7YmtaXKnmX8DdYnUJ7WQFa7xjr2Yun9WIdQNoCf_K28,3255
187
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=3Zt4hbC3yJxWvP0T7K93YAPaUP8fQ1P1Wk0CGqtBga8,3259
179
188
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0HPFNkhgQ_Yd0190i1bQSgth3q4zCfBgiRQJsITO-S8,3265
189
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=4B0SmzRQ2-PsBJcFe7neM1OKfWpsbiY4x6c6COQNMsQ,3254
190
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=Piw4LN6d8QYrUahWsw3XUOtTMD1o3vHPwA94sGI56Gk,3242
191
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=3T8_rF2PEojhgTMyQ8DscXgJCWWdWfDPj4M434zWcA4,3243
180
192
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TBscr1uWwpY0FrKQ5Y3EO_Qg6I97u4f_zjnWRvoeLvE,3260
181
193
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
182
194
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=uv-RwTNZT2n264dLo4eWxUpB3g7QqUyf2MFEGiRvoqQ,3251
@@ -250,25 +262,25 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=43-UL9KEMoaiC0cRSzWFbg2PA
250
262
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
251
263
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
252
264
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
253
- sglang/srt/layers/quantization/__init__.py,sha256=yokDLpqQZ6eIIeaBZggJG-oS4h3TmroXZHLL40YykeM,12159
265
+ sglang/srt/layers/quantization/__init__.py,sha256=g5VWhdnQuyxFEcPyMhreR8QlEn3tXaCH34QPdAdufZQ,12369
254
266
  sglang/srt/layers/quantization/awq.py,sha256=VImnVCU_QBLFba6S88T0dJ-vLy6SMm3OLIMEdllDfVI,6663
255
267
  sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
256
268
  sglang/srt/layers/quantization/blockwise_int8.py,sha256=yE8ARplbha1sW1Szl-mgsRDzGTRpEZY_zAKkCJIu680,15010
257
- sglang/srt/layers/quantization/fp8.py,sha256=J5D_KdRYiOQ4NCbjoKfYDHdIgCGMy-tQwHlTiG44pJc,41189
258
- sglang/srt/layers/quantization/fp8_kernel.py,sha256=JRalHJ-btDpzl3oXu2R_ZoJBu5TzBBmW_wKZDFs-usQ,24384
259
- sglang/srt/layers/quantization/fp8_utils.py,sha256=CDR2fLrZa_mZ86n5S2dDjYMpVCGa2n7gCXd2BYZjXcM,21391
269
+ sglang/srt/layers/quantization/fp8.py,sha256=bYj6-xRO-bXsrDDaYzA2sKTmOvznLu7ZVoAtGR7cKjM,41834
270
+ sglang/srt/layers/quantization/fp8_kernel.py,sha256=F49gP48suKwzO1QejmGSV6XrBSOgwW-bsaM-rvUB_mE,27460
271
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=ymdhxw-vMfJIzxW0uFg_iDTzlurN_R9dFXyEmYXsluI,22516
260
272
  sglang/srt/layers/quantization/gptq.py,sha256=e4rMz374-yQQqeAI77WPxfcAaRk38GeN2akEpvnC_Do,15141
261
273
  sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRn_imIw8kNgqdtb2lr7BettjgDgimbl1Rubnamjh8,11352
262
274
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
263
- sglang/srt/layers/quantization/kv_cache.py,sha256=rJi6amyLZsquUMo_V5iLlPMqdsGTLgxh4popN1xUHCQ,4236
264
- sglang/srt/layers/quantization/modelopt_quant.py,sha256=mne4uKF0R-K0OvWN7X5ZxD4LdXKBc6GvmpZzIW6gkmM,6969
275
+ sglang/srt/layers/quantization/kv_cache.py,sha256=-yaFTdB75T0BbvQeuIpH6rZoL3R8t6OIJVGB-xdtpCw,3492
276
+ sglang/srt/layers/quantization/modelopt_quant.py,sha256=Ff7qMv7CCWj0QY5gkDnwlQYLH1mbMtopbw8jXMLndXg,16616
265
277
  sglang/srt/layers/quantization/moe_wna16.py,sha256=3Z8Eq4_ehTN5EEotlYC09FpUNmF8VO8uv7QzUqJa0QI,19371
266
278
  sglang/srt/layers/quantization/utils.py,sha256=QqGFwRnFenOm5HfyLoS4D06_LyvNWgOggAiFtZXTpQ4,5637
267
- sglang/srt/layers/quantization/w8a8_fp8.py,sha256=XcQdgqXA3eKbAf-4_0I81Y5Nvjns3bQTocovnN8141w,6234
268
- sglang/srt/layers/quantization/w8a8_int8.py,sha256=oLURfgMpsES8qLf0CIJ-4rfQgBGf452Lo0U6tvq6jH0,8856
279
+ sglang/srt/layers/quantization/w8a8_fp8.py,sha256=wY9Ztw9RM5Vd3MHMLauy0KD8xcQ8JZUB_M4LeyE8-UU,11654
280
+ sglang/srt/layers/quantization/w8a8_int8.py,sha256=fIoSGeaL5kZNrExKAKWBgJc9hNXns_w9zP7vw2dVPHA,8892
269
281
  sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=ngKVSHfQUNSZzrLMu4Iv_4Fzt2eOoOIZKcO2RNDiwAM,25353
271
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=roqRrIJybA9YuN3kqSeoLTJhXfTHOOtJd5MkenpOL8E,25835
282
+ sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=Z2h9WQ74Umun_0wj8I5fr-ScRfuSrpSU9otI29jLAM8,25606
283
+ sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=RYX3vJ0Dh984KktTkNr1Zt578W9gp-YFY5nDiOpd7IQ,27073
272
284
  sglang/srt/layers/quantization/compressed_tensors/utils.py,sha256=mnUmKWFQUnY8bVoFHUuNVwqsfS-cefeR-ofyaihCXcY,7621
273
285
  sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=qcdRgoUNJWXqSimns-D987TW0OTk3uFuWNBX8Z6H8Fk,246
274
286
  sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=tdKJC8c3SX8T3z8JL-1YCsg4ftcv55Wxt0vZrYftpX8,1635
@@ -445,68 +457,68 @@ sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMa
445
457
  sglang/srt/managers/data_parallel_controller.py,sha256=Xkj2n9uDyq7a-AVDZlfzeuNkC4ibsSftb1_bed9hgQ4,10318
446
458
  sglang/srt/managers/detokenizer_manager.py,sha256=HTfpJWMF1EImhKOnLJ96xPmYXm71xzaisLMfxg3zpgs,10111
447
459
  sglang/srt/managers/expert_distribution.py,sha256=r3o5RGI0gnV7xb60AApqKYa0oiSB37oB7hQBX7P3xZM,3225
448
- sglang/srt/managers/io_struct.py,sha256=_WYQ2x49Wc8WqmZH0Q5Te7zVrGRQkbn0ADghuDwyk7k,23852
449
- sglang/srt/managers/mm_utils.py,sha256=KxZF684q0ohUn4J4dPMdDfGtOKLyWUZ3o7yG-mGcjnY,15464
460
+ sglang/srt/managers/io_struct.py,sha256=88tlo4xLTqpb_qv2mJpJPZQxpcWCK3xW34GUij05McQ,30860
461
+ sglang/srt/managers/mm_utils.py,sha256=BdeiJG1vR89v8j9NwdHdanZlX5iiyxYWwHL5T0CjwUg,15537
450
462
  sglang/srt/managers/multimodal_processor.py,sha256=37SSZIdhdmcGaZSH2A2GLdntcbIxDUiomX6WR_BpmtQ,2132
451
- sglang/srt/managers/schedule_batch.py,sha256=oHXIHW8imrDnV0PuOjysG9qvEVYqQY9XGCTMKzDd-6I,61013
463
+ sglang/srt/managers/schedule_batch.py,sha256=4Xi7SdCiTKWraU8xQJQT1XYGMNt8TUW0dsZaNMDLutM,62509
452
464
  sglang/srt/managers/schedule_policy.py,sha256=E1qVq2G3jptKdX9nlqfayeRBUll9xB6bK8nBf3EW32E,19469
453
- sglang/srt/managers/scheduler.py,sha256=hSYFlzkr20ZCYVvPfffmmG_aQeLUx9xjDCcvudEnU3Y,80024
465
+ sglang/srt/managers/scheduler.py,sha256=89db4YxUDZyrEZAKdbrxNIWk3EPob2aqvtZK4G8E2D0,82318
454
466
  sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=u2sj6MViFTov0lVZSysZ-wph2pEqRCtCjwA1UdttZ7I,26338
455
467
  sglang/srt/managers/session_controller.py,sha256=o-ifit0n4_xHLNmyD0Ams8FxGRgxFybX-Vz1hwgr3UQ,5755
456
- sglang/srt/managers/tokenizer_manager.py,sha256=dSuYrkMNZUcI1hSNP4sw5fHV4FdndysNeRG1NsxBEjo,46163
457
- sglang/srt/managers/tp_worker.py,sha256=IFiOhbNIya-7cqgp_Yg0ZXGcsgy9YS295AfxJYjFqzQ,8833
468
+ sglang/srt/managers/tokenizer_manager.py,sha256=ncPzDWeTgqi--V2LgRrzVIGp2aPl9Dcsv3qWCLDTBE4,46665
469
+ sglang/srt/managers/tp_worker.py,sha256=khF-hXOrtF_IesOyUSjEBjb7fAh3CakdiKR7Ebj2wp8,8894
458
470
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=3_ZJ8Rq7v2ZDaRNTRu5Dy8AbqiAlJQp3IAKnn_WAwd8,9127
459
471
  sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
460
- sglang/srt/managers/multimodal_processors/base_processor.py,sha256=8ELm-cEJgFnOh8DxzsgRlCEGjewA68IygJRZGlF8Azw,8923
472
+ sglang/srt/managers/multimodal_processors/base_processor.py,sha256=5pkKBqajyBRv7uM183NnrhTMYybC0HeUzHID_OkEnfA,9859
461
473
  sglang/srt/managers/multimodal_processors/clip.py,sha256=lRc2mcuDbAhZVf-0EfkO81pqDiol9zLvTpDqtPIBQ2k,1525
462
474
  sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py,sha256=j7j1D38azudJjYthVpdz7jxQ9Z7SjwQfskpOIshAdiY,3147
463
475
  sglang/srt/managers/multimodal_processors/gemma3.py,sha256=UlkyIoc8XOw69iFBYiBYLx--pdfnM4JfCFtwRrd3w-o,2267
464
- sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=wZs4HZhPov7yvV2VU2ep4k1ANOimVqPRIs3cpC-O1I4,1820
476
+ sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=4bQZ7WByd53d1PcEgVeeXRyWnC78nO-8RsQbGWRDyYM,1852
465
477
  sglang/srt/managers/multimodal_processors/llava.py,sha256=8mac3vUUpVd12o43k1TyMaLEySZB915ks8Q5epeZmbg,6209
466
478
  sglang/srt/managers/multimodal_processors/minicpm.py,sha256=Mq-iH2j90VrGAbSaF3ayYWhTEm9RvWNI6ZhBb6G23dQ,5684
467
479
  sglang/srt/managers/multimodal_processors/mlama.py,sha256=MLiGS606LzVtdoXvjWGANx-K_7nE9J_fMVmkXN7Gz8k,1661
468
- sglang/srt/managers/multimodal_processors/mllama4.py,sha256=K6OKhSZOoaHwrRt0ZVi3gi2vnzMVHWJb5n3fUoStwIs,6188
480
+ sglang/srt/managers/multimodal_processors/mllama4.py,sha256=50Yox7TaGrrB7iPjN1dQ_UzuY41x7VLmMcRXBhTgUvE,5592
469
481
  sglang/srt/managers/multimodal_processors/qwen_vl.py,sha256=67EmFiAkvZncU-eqiiS0Q4dr3pWcfI-RofYiQnNWvu0,5722
470
482
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=NY62Zo0A0tLJ7ObRLOQqQcXCxoJUDZsK8f5U4dNQjKc,973
471
483
  sglang/srt/mem_cache/chunk_cache.py,sha256=it5SfL1FwMbrdeOH-I-Eu_i-I9hFB1xL-z_brIUoCkk,1835
472
484
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
473
485
  sglang/srt/mem_cache/hiradix_cache.py,sha256=t3zxOCg8A4uMdjrtKbSdDJBwFubqnhfGOEdSs_22Zb4,16161
474
- sglang/srt/mem_cache/memory_pool.py,sha256=v5Mzx1VuyRpZ7P3liiKKfXuw_t24kjYPpqV_ZCwGCeA,31098
486
+ sglang/srt/mem_cache/memory_pool.py,sha256=MyFVt81pCiiHoO3zRbIP_Z-KSAhHeSQGFQuL_kFL_L4,31638
475
487
  sglang/srt/mem_cache/paged_allocator.py,sha256=BrJS0vN1k-vTSgb_M8u_1KoZFRgzgR1WRyImCTq3T0U,9770
476
488
  sglang/srt/mem_cache/radix_cache.py,sha256=Lm-pco6CJ4orb9IfDpbHm5MnyK8Ya0OF1x9p88dv548,14906
477
- sglang/srt/metrics/collector.py,sha256=aCxHqgsQ6P8ZxsAvq_MoEVsr3KUvIUSOBpGYMgBxmOM,8442
489
+ sglang/srt/metrics/collector.py,sha256=zHg4twFQJvuK1mSme3-EYQa9PJryfp_u7a4RxQ5RcO0,8874
478
490
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
479
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=bDLOqlxdwRUyKitG8JyZygnm05N00q-TdNiAayG_T8o,23223
480
- sglang/srt/model_executor/forward_batch_info.py,sha256=8VI1VxSmyH26lIHnCNeGqYw2XxslbqN_cuSUIEPUtRU,19468
481
- sglang/srt/model_executor/model_runner.py,sha256=4Xi-1u1tTC34uK_DtYEaj7VtvPjDDgMzRaXeJ5kpsQE,45076
491
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=ulYmFv0mlQ4aawuFpX2qkaAVx3qE5tEYj4D7hOOEct8,23325
492
+ sglang/srt/model_executor/forward_batch_info.py,sha256=_qSMTiLxvcPIIgqRfUqG4W--OoirVY7ulcFfZqQIqjo,28689
493
+ sglang/srt/model_executor/model_runner.py,sha256=5qvLlql8rIMJUa8DTChrraq_7-s6PusnpdPctED3PJU,46909
482
494
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
483
- sglang/srt/model_loader/loader.py,sha256=AUS4SqSFghbQjs29C65lg7_zxR9h1t7N5G0gERjc0Rc,54238
495
+ sglang/srt/model_loader/loader.py,sha256=YYmtvkQw0B1qgPw0_gN-K4yy7CEYbTSR__0Dl1Fnm6k,55342
484
496
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
485
- sglang/srt/model_loader/weight_utils.py,sha256=Bkfgz6TUjkJJb8BiFxrv7FgbZFd9eW2y21jDBPdLWSo,32056
486
- sglang/srt/models/baichuan.py,sha256=iXgta-W38OWesxmXWZJ73fUvPdu51EwTQzUD5mmfJ8s,15721
487
- sglang/srt/models/chatglm.py,sha256=avLC7mjjGskBxCxy-9s0sMlAJjfFoG_y8VieR1QfDsM,13918
497
+ sglang/srt/model_loader/weight_utils.py,sha256=yKnau-wH9muczoCpDTCVIqXFqz-QJmEEySplX3bMJWk,32153
498
+ sglang/srt/models/baichuan.py,sha256=HbvlErnkCSK4pRQYCSDxMcrn-1DQyfiNoeDcnRrJas8,15807
499
+ sglang/srt/models/chatglm.py,sha256=cajLN9caBl09e0TwOFkiTTKDqwlbmHo_yS-NCjdeQW8,13957
488
500
  sglang/srt/models/clip.py,sha256=fCMtAcaKjruSIWfD4YGb4HXh6Tzp2pjpgDmp5JpwBPU,19794
489
- sglang/srt/models/commandr.py,sha256=Ug-B0QcdWZufrTybC6K5yP3MLKNsYb-vzfrqUsXYUcI,15276
490
- sglang/srt/models/dbrx.py,sha256=0Vf4yhqe8YeQuKR3P-agvYJScmHwH3-tFbyU8kv5QJM,15559
491
- sglang/srt/models/deepseek.py,sha256=Le2MXij8m4hT7QYgD0bFMFmYhbgX7SMjoXZFB8BxgyA,16871
501
+ sglang/srt/models/commandr.py,sha256=5Y_b3K0QY7D37nFGkyiGgY38RleRui_GJUYcHSuHUZo,15315
502
+ sglang/srt/models/dbrx.py,sha256=4pn_fdoATg01VEqNnIAxNEsKV5XU7gwHyd289eydq1s,15598
503
+ sglang/srt/models/deepseek.py,sha256=jZFQUVJ753qcI8_3sh6TlLF-8oYiQndQm-3No8FInXk,16910
492
504
  sglang/srt/models/deepseek_janus_pro.py,sha256=8wAzvcGdyo--3faMN4QtagT1eAZMhMFduvpCXqUS48Q,70456
493
505
  sglang/srt/models/deepseek_nextn.py,sha256=kca-2Fm2_SmqbOEFfd80pobooi1BXd1oe_4EsUM6SeI,13561
494
- sglang/srt/models/deepseek_v2.py,sha256=HJ8cuH87E_EF62YXlmYBjGVJk5P721T5M-XKMqsrbYg,62633
506
+ sglang/srt/models/deepseek_v2.py,sha256=LtaJOxEn6ZnsXkiiVdS646u7lw68TeBg3iPy2LGj_cY,70572
495
507
  sglang/srt/models/deepseek_vl2.py,sha256=RVvi_3qsfrkqMCCnjjTA8OwUc5ySutc7asAH-rUJLVo,12922
496
- sglang/srt/models/exaone.py,sha256=5iibqQTjpgosuGRt2rj2lWR0ShK2XGhbdFSnOWpaQss,13386
497
- sglang/srt/models/gemma.py,sha256=3XxMDOKz4xMP6VzWoW8f0hmMf8LP8fhzMw5prsYC4e8,12602
498
- sglang/srt/models/gemma2.py,sha256=MDe_HNkSpEJpw426tbx3fp271GBlSVEuhIdGeOB_jYA,16356
508
+ sglang/srt/models/exaone.py,sha256=rX7J0xFt9TSt6tMIhnYMkb5KDnqTJIV4BtjPLFwQ8_8,13425
509
+ sglang/srt/models/gemma.py,sha256=4cdrPISg1VKnsuI-QPTpYvet4BrX8BMKvCIN82iLskw,12641
510
+ sglang/srt/models/gemma2.py,sha256=kqtwdo93GWKm2iBN29RoIRH2ggRm-K_80LM5btgfBLo,16395
499
511
  sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67Ksv2A,2618
500
- sglang/srt/models/gemma3_causal.py,sha256=nKO-DRtvcXn5bHquxILgnp0fJT6usoB81W8kPVdYsA0,24934
512
+ sglang/srt/models/gemma3_causal.py,sha256=G_vNxBZBJOVigZg8B3XGtbSmak9LEtSFFr9uQMXBHQY,24973
501
513
  sglang/srt/models/gemma3_mm.py,sha256=tWX2vIdRf5zePwKMLbb0d24DUWoTdjmdXnxIcULQJ2E,15221
502
- sglang/srt/models/gpt2.py,sha256=dAnfmsAL7JVHakryqrERR1jgL8mI1Op6nPHYfDCF7Ao,9802
503
- sglang/srt/models/gpt_bigcode.py,sha256=EAN6xAXpa8m3DcBuH1D4rTPji2oG9NSozGXSNHtE2lw,10268
504
- sglang/srt/models/granite.py,sha256=nu_Zl_PYn188gk1uYVZ76y4wwHZV7G0w7uanhqpSFUs,20813
505
- sglang/srt/models/grok.py,sha256=pQOXtpHOYVntwt5QQRLffYsnMHmMfPMmGyKMfR0k0Ic,27994
506
- sglang/srt/models/internlm2.py,sha256=4eh9WVgK4yg13IsnH5qB2xUCWnixj_aLLz7qa_4m2_Q,13017
514
+ sglang/srt/models/gpt2.py,sha256=kclhxEs8oJk1KCyhmAqo7rZqecVGGHYkc-a1WZi3aIk,9841
515
+ sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtYhl4,10307
516
+ sglang/srt/models/granite.py,sha256=5WOJyNYAlt5RNHSexNfPNihhSxIMd7wPzju1cTixKig,20852
517
+ sglang/srt/models/grok.py,sha256=vESZeGS4adI_JAerXIkCcTm15-CNiGeS7VHc36C6w1A,28033
518
+ sglang/srt/models/internlm2.py,sha256=RDAT9drjdgVEFmCMq99RTn3weMQFhl1NHhkhyDX8f7M,13056
507
519
  sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
508
- sglang/srt/models/llama.py,sha256=gcl2YtnM54J_fZQx2Z26LMm7vPbWN7N1CjzlaBEA3zk,24893
509
- sglang/srt/models/llama4.py,sha256=4WqHX6YPBrlJVA7HoQTMUfdoU_mEhpWSgoFaeKdhdCE,15018
520
+ sglang/srt/models/llama.py,sha256=71GmA-_-CNM2kuEJplNg6tfWbjCW31EzkeVIk5ZwNmo,24932
521
+ sglang/srt/models/llama4.py,sha256=JIVS5Q1lnmEpAHDI487gKO_9xfTCehSpzInNQeCg8JU,17940
510
522
  sglang/srt/models/llama_classification.py,sha256=4QWTFaUZIFKYZvEzs8bx8VkOZNIwdYCLrnwrdAw4QK0,3108
511
523
  sglang/srt/models/llama_eagle.py,sha256=OB2lKsjn7BcfCZljklnhk83me8j0PuQmYLou7baNcq4,4866
512
524
  sglang/srt/models/llama_eagle3.py,sha256=v3bftBVDIGjnzngQYnu19cy0J_3w7yruHqLP5nsAQDM,6642
@@ -514,34 +526,34 @@ sglang/srt/models/llama_embedding.py,sha256=zq-_lNu35VBFc7eemiam0zdkGIE8fzrgk5OW
514
526
  sglang/srt/models/llama_reward.py,sha256=LF2nqMV5XOrljGjAwJg43mBv3z6Q040I2EYlgZeCp8k,4681
515
527
  sglang/srt/models/llava.py,sha256=KMwNNrlMuMaKEOZMDRBKBQbe6uctpKTLc0zOceyGC34,27242
516
528
  sglang/srt/models/llavavid.py,sha256=q0lHlRnoYHKJZsWnkIQdd6dYAQ26t7XsmrqA0zDGmZc,12829
517
- sglang/srt/models/minicpm.py,sha256=-ot45U_Bv4x85JdbIAQXoxa1sF-ZDkBk8flU-Ruli5Y,14652
518
- sglang/srt/models/minicpm3.py,sha256=sRHPFUH636GIY94B-hpAN2MSzYT1pzLPVypTNjUtttY,26270
529
+ sglang/srt/models/minicpm.py,sha256=m5HFsSJj0Po09LY9R6qj6K4gceqWDMOePz3NDGgMGT4,14691
530
+ sglang/srt/models/minicpm3.py,sha256=ZQpk6j2UjtVDR5gA0_jGYvl5Vsvm7NBH7xkpNjqgGw0,26348
519
531
  sglang/srt/models/minicpmo.py,sha256=kJnp8UwJTV7kXEpuVWA50ecRsuZyFedHlwkprix8tag,75619
520
532
  sglang/srt/models/minicpmv.py,sha256=79zZn3co9r7SERatx49EuHRoLWRiy6qeaUFgjDWJo2I,40571
521
533
  sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
522
- sglang/srt/models/mixtral.py,sha256=6Fse2J-20IMylP-yzpEihIinaH37TmmslATbLcWBRYY,14926
523
- sglang/srt/models/mixtral_quant.py,sha256=MSa6UKPbgv8Rn8Iv8o1dQhcstAHLNQzE0eepFx_hYSw,15221
524
- sglang/srt/models/mllama.py,sha256=SsK_cEolaeoXh_HkyXsSF2ueYR3sPv1NvnGH2k6Aqx0,38461
525
- sglang/srt/models/mllama4.py,sha256=E2mCxJ1zCt6Io4LL4Rtt5uqMj7Jy971234ZcuyJZxSo,5800
526
- sglang/srt/models/olmo.py,sha256=FJk8A3T3TF5QcTV6rMP8np94QtvxpMWlgCsv_5VwpVE,12632
527
- sglang/srt/models/olmo2.py,sha256=U0ScFzWazOrb_Q90sfXkpVNAsXT-pgZbNgGh80R40VE,14288
528
- sglang/srt/models/olmoe.py,sha256=tx5OKWLOr6_pohe2eBcIodCmcuSjtpteHq_tG_QVYCY,15910
529
- sglang/srt/models/phi3_small.py,sha256=6p-5EBbwN3FmhoL0VNrNb6VP7HqIhYgVBfQ98L6Rjjo,15469
530
- sglang/srt/models/qwen.py,sha256=edS0UYq6AoHZdYUJtQa5wyFNzZMW0JAMmBulH2uheaw,10719
531
- sglang/srt/models/qwen2.py,sha256=2C5wJXPsaETMGOojZfQ3v2LmqxtVldxu6upZq7ZTqB0,16142
532
- sglang/srt/models/qwen2_5_vl.py,sha256=azSqNvzEQT4ykx8f0X8mfh2LTSu2OhO0PEES0K09pro,24329
534
+ sglang/srt/models/mixtral.py,sha256=zQHCL_ZMKmLR7jitpEw8n7Rv6xhxUJzSXklsw6auh2E,14965
535
+ sglang/srt/models/mixtral_quant.py,sha256=-kQw9r8KcLdO8SNN9RKXzrGq9Q2Al9l9cWHi1VrZSRM,15260
536
+ sglang/srt/models/mllama.py,sha256=jYV5ckyuJN5XU2VXjUgV1i-Yz5rZDQ-6OYsNZvUTJjo,39775
537
+ sglang/srt/models/mllama4.py,sha256=65_YDBaNSeJJuigz3sZKvsq25ZGO17MQIoya8ukJgRA,9086
538
+ sglang/srt/models/olmo.py,sha256=7-q_fA6XXdG7kPUjpUzYkzMUWJobuSjhqjYw9xSUs_c,12671
539
+ sglang/srt/models/olmo2.py,sha256=azmljhJF4ivcQfUtfsAUxq3ducE4tRKTL6iwe0IKYMg,14327
540
+ sglang/srt/models/olmoe.py,sha256=TMzt-yB891bvA4X50xL0NjNnFYSx9imlA7N1EG8KNK0,15949
541
+ sglang/srt/models/phi3_small.py,sha256=UbqZvpwWolXUPd0zbKgbL93yVXUY1n4kXJLgIe_gjaM,15508
542
+ sglang/srt/models/qwen.py,sha256=xYkVmMZS2uMqWhfndc8EYm0olpKFnggfuMp_6aobVi4,10758
543
+ sglang/srt/models/qwen2.py,sha256=fYE5fkyRYTEVVl8XnQO2-ybj4ZhNtM7Kn12AQt39EDA,16181
544
+ sglang/srt/models/qwen2_5_vl.py,sha256=uNnYhY8x-9H1GzUJkj7lUtR5d-0yMRNWUcT7-4qPlMU,22555
533
545
  sglang/srt/models/qwen2_classification.py,sha256=dGrMm4ebd30_lBhHOhaV57ig2iOTx3nqB4GEzsrRIM8,2747
534
546
  sglang/srt/models/qwen2_eagle.py,sha256=Iz0HWL2FgSD3FqoFhfYmbIZeEYkPTJ96lYbkncmHJX4,4644
535
- sglang/srt/models/qwen2_moe.py,sha256=9cLOPHBpwdID92Ed1CEjMUxMxcWX83lklunHbyK4To0,18206
547
+ sglang/srt/models/qwen2_moe.py,sha256=GhDR7pP_G0NZ2HkaFVrBZnbqB0RxxNnH-8HMLwrweE4,18245
536
548
  sglang/srt/models/qwen2_rm.py,sha256=-mQXDEv11p-I1HXgYLTtY6ROem6UYorO958WsDrzsgs,2837
537
- sglang/srt/models/qwen2_vl.py,sha256=HMA6bww6bCYp7hTPUqSOigSCQRbhonKjTS6lxakclAM,22092
549
+ sglang/srt/models/qwen2_vl.py,sha256=NCG85isoPkepv5RU-eLh44rCHPhfT3bu7pifNdBEsVw,21612
538
550
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
539
- sglang/srt/models/stablelm.py,sha256=w93fNXpDwQbuKi4tdeo0bsXFZrMZVY4_pgNL0E5RErQ,12242
551
+ sglang/srt/models/stablelm.py,sha256=0x_31uIr3WcWwecdPAI3ek9KkyKBJS7VwknTk2y0gjY,12281
540
552
  sglang/srt/models/torch_native_llama.py,sha256=5tfFSMAXB3ScToqTALtCXa8Oo-qPCJh-KQCNB6QOlNA,19293
541
- sglang/srt/models/xverse.py,sha256=I7ivNsk6NRqPxlMUmdclpzDCvhAnWbv_GOj01MKHJrQ,13996
542
- sglang/srt/models/xverse_moe.py,sha256=xLwn5pRwQrvj7zMmwl3o49m7xILb2ACRdWvm9hY8LDc,16743
553
+ sglang/srt/models/xverse.py,sha256=DsNVI9JpzN4jj0Ry6aTrj7r-xq5YLOoDX2kH4YLJA-I,14035
554
+ sglang/srt/models/xverse_moe.py,sha256=7KCM2-j12towDMNvXkuuYiBOmNauH6NG4Ip40x0khqA,16782
543
555
  sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
544
- sglang/srt/openai_api/adapter.py,sha256=DRHA38G0T9EV2npsKZPBBH4RGJocjZtIov3U5d5VDX0,71919
556
+ sglang/srt/openai_api/adapter.py,sha256=DaSU4Pri70s3ZeMHeVzsnKjd8dA9lx_HOmpVs1TEepo,72095
545
557
  sglang/srt/openai_api/protocol.py,sha256=Y8PFFhLbzhpoERM6-WsTkm-ZuGcE-3tfenh9e-AC1vc,13374
546
558
  sglang/srt/platforms/interface.py,sha256=hym3iooBB4C8if5hDZezgVN6h4NIOu7sg2ZUBIV6XmM,11246
547
559
  sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
@@ -553,9 +565,9 @@ sglang/srt/sampling/penaltylib/min_new_tokens.py,sha256=rdU_D7RoIcrQPhysNQEzmr4T
553
565
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=XM-Lm6u7gYPtMZrTIc0FR4QxNZxBH5s_Cj82umyCzYk,5721
554
566
  sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=NRh10AJrrQlGJ6S-enGdRefrTrWpyqrSm-aNnyqQNQQ,2119
555
567
  sglang/srt/speculative/build_eagle_tree.py,sha256=SFQ3eHbhfNxOdxgqDP5wSV_ZlIVqLw7VivycNZ963N0,11690
556
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=Bcsp4g0VvBmsrclkgKq512skfw3hkO2zkHX_91pBaAI,9252
557
- sglang/srt/speculative/eagle_utils.py,sha256=0kxQ69XNKO52qgKz-afO1aNF5Tbf5g1HHB7GMuUROG8,29074
558
- sglang/srt/speculative/eagle_worker.py,sha256=TysB0F6tFjblIjqoD2nlKQPNBMszDilsII7-mFWFjmo,26999
568
+ sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=FP-Dc6K4zaL2KQA8QsNccBM8TXnwREh1I2iPL9KHo8I,9252
569
+ sglang/srt/speculative/eagle_utils.py,sha256=mv--nBUgAbqP30pU3aGEMwQIHBwwevETUMQSZAelApE,28721
570
+ sglang/srt/speculative/eagle_worker.py,sha256=Qvg3B40GKH6vjyrZ9SmiVyW6KbuRJJHYXbnRCvSz3aE,27016
559
571
  sglang/srt/speculative/spec_info.py,sha256=rhaKG0TzyF9XZYHEWp1jccwTBohSNsUDvxHFtAoOl18,709
560
572
  sglang/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
561
573
  sglang/test/few_shot_gsm8k.py,sha256=7VLbWl4nCQs1wjtW4q-46jf9jUCycSs5Iw8v7sUSzBw,4284
@@ -570,17 +582,19 @@ sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWU
570
582
  sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
571
583
  sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
572
584
  sglang/test/test_activation.py,sha256=GeTIJHxlLQfW3kM-X1FGa8Sa3dSGKHEXl5wEy-hfGis,1489
573
- sglang/test/test_block_fp8.py,sha256=IqdQKt23annq_QR1gwVX0vzdMyWTEBLRhmPiLMemKI8,14458
585
+ sglang/test/test_block_fp8.py,sha256=6Ux1_E6EWdY184n8tiYOCwbyHVAUEqz9lMhSUDLIOC8,16292
574
586
  sglang/test/test_block_fp8_ep.py,sha256=N1rvqbPErBaFFpeAw8TLYXGNZOoG7cfIBP2p5XbSyMo,10806
575
587
  sglang/test/test_custom_ops.py,sha256=4X3-odkJntwNtBAuKtCbYHu6peIP6LaI_VwLw7kmDx8,5550
576
588
  sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
577
589
  sglang/test/test_layernorm.py,sha256=2GMWqqNDuGvSMSsEBF5eDCzwVSYA9E6hGhRo6s4ecKg,3764
578
590
  sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,18877
579
- sglang/test/test_utils.py,sha256=jUkIDxJ7I8hCPk0XF7F_IWJkOtn6O7eXJG5pI0cduwo,30463
591
+ sglang/test/test_utils.py,sha256=Y7XMx8-BTQJr6a90qRVpK4x9Lkl_p2WyL0VwFNHxhPs,30530
580
592
  sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
581
- sglang/test/attention/test_flashattn_backend.py,sha256=OxS1KsPs19nwZcDtdURj7_liT1cIfEXb6W4FH9KMaaE,10808
582
- sglang-0.4.5.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
583
- sglang-0.4.5.dist-info/METADATA,sha256=dFvXPJ-aE-juLKgxD5l8wflGgO1cHg2jHjScLX_Ftjw,25061
584
- sglang-0.4.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
585
- sglang-0.4.5.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
586
- sglang-0.4.5.dist-info/RECORD,,
593
+ sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
594
+ sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
595
+ sglang/test/attention/test_prefix_chunk_info.py,sha256=er0i3KGHMkw-4UZB1GCFd4oYwRcXfU5wpO1ORqpNGGA,7626
596
+ sglang-0.4.5.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
597
+ sglang-0.4.5.post1.dist-info/METADATA,sha256=602BdUHYnIS2M5riEnyqkhTpEKDRbH0J0ubdy1FK8fg,25571
598
+ sglang-0.4.5.post1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
599
+ sglang-0.4.5.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
600
+ sglang-0.4.5.post1.dist-info/RECORD,,
@@ -1,81 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- from enum import Enum
5
- from typing import Optional
6
-
7
- import numpy as np
8
- import numpy.typing as npt
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class KVArgs:
14
- engine_rank: int
15
- kv_data_ptrs: list[int]
16
- kv_data_lens: list[int]
17
- kv_item_lens: list[int]
18
- aux_data_ptrs: list[int]
19
- aux_data_lens: list[int]
20
- aux_item_lens: list[int]
21
- ib_device: str
22
-
23
-
24
- class KVManager:
25
- def __init__(self, args: KVArgs): ...
26
-
27
-
28
- class KVPoll:
29
- Failed = 0
30
- Bootstrapping = 1
31
- WaitingForInput = 2
32
- Transferring = 3
33
- Success = 4
34
-
35
-
36
- class KVSender:
37
- def __init__(self, mgr: KVManager, bootstrap_addr: str, bootstrap_room: int):
38
- self.has_sent = False
39
-
40
- def init(self, num_kv_indices: int, aux_index: Optional[int] = None): ...
41
-
42
- def send(self, kv_indices: npt.NDArray[np.int32]):
43
- self.has_sent = True
44
-
45
- def poll(self) -> KVPoll:
46
- if self.has_sent is False:
47
- # Assume handshake completed instantly
48
- return KVPoll.WaitingForInput
49
- else:
50
- # Assume transfer completed instantly
51
- return KVPoll.Success
52
-
53
- def failure_exception(self):
54
- raise Exception("Fake KVSender Exception")
55
-
56
-
57
- class KVReceiver:
58
- def __init__(
59
- self, mgr: KVManager, bootstrap_addr: str, bootstrap_room: Optional[int] = None
60
- ):
61
- self.has_init = False
62
-
63
- def init(self, kv_indices: npt.NDArray[np.int32], aux_index: Optional[int] = None):
64
- self.has_init = True
65
-
66
- def poll(self) -> KVPoll:
67
- if self.has_init is False:
68
- # Assume handshake completed instantly
69
- return KVPoll.WaitingForInput
70
- else:
71
- # Assume transfer completed instantly
72
- return KVPoll.Success
73
-
74
- def failure_exception(self):
75
- raise Exception("Fake KVReceiver Exception")
76
-
77
-
78
- class KVBootstrapServer:
79
- def __init__(self, port: int): ...
80
-
81
- def poll(self) -> KVPoll: ...