sglang 0.4.4.post3__py3-none-any.whl → 0.4.4.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. sglang/bench_serving.py +49 -7
  2. sglang/srt/_custom_ops.py +59 -92
  3. sglang/srt/configs/model_config.py +1 -0
  4. sglang/srt/constrained/base_grammar_backend.py +5 -1
  5. sglang/srt/custom_op.py +5 -0
  6. sglang/srt/distributed/device_communicators/custom_all_reduce.py +27 -79
  7. sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +2 -2
  8. sglang/srt/entrypoints/engine.py +0 -5
  9. sglang/srt/layers/attention/flashattention_backend.py +394 -76
  10. sglang/srt/layers/attention/flashinfer_backend.py +5 -7
  11. sglang/srt/layers/attention/flashinfer_mla_backend.py +1 -3
  12. sglang/srt/layers/attention/flashmla_backend.py +1 -1
  13. sglang/srt/layers/moe/ep_moe/kernels.py +142 -0
  14. sglang/srt/layers/moe/ep_moe/layer.py +79 -80
  15. sglang/srt/layers/moe/ep_moe/token_dispatcher.py +382 -199
  16. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json +146 -0
  17. sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  18. sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  19. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +403 -47
  20. sglang/srt/layers/moe/topk.py +49 -3
  21. sglang/srt/layers/quantization/__init__.py +4 -1
  22. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +2 -1
  23. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +34 -10
  24. sglang/srt/layers/quantization/fp8_utils.py +1 -4
  25. sglang/srt/layers/quantization/moe_wna16.py +501 -0
  26. sglang/srt/layers/quantization/utils.py +1 -1
  27. sglang/srt/layers/rotary_embedding.py +0 -12
  28. sglang/srt/managers/cache_controller.py +34 -11
  29. sglang/srt/managers/mm_utils.py +202 -156
  30. sglang/srt/managers/multimodal_processor.py +0 -2
  31. sglang/srt/managers/multimodal_processors/base_processor.py +45 -77
  32. sglang/srt/managers/multimodal_processors/clip.py +7 -26
  33. sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py +17 -58
  34. sglang/srt/managers/multimodal_processors/gemma3.py +12 -27
  35. sglang/srt/managers/multimodal_processors/janus_pro.py +21 -47
  36. sglang/srt/managers/multimodal_processors/llava.py +34 -14
  37. sglang/srt/managers/multimodal_processors/minicpm.py +35 -38
  38. sglang/srt/managers/multimodal_processors/mlama.py +10 -23
  39. sglang/srt/managers/multimodal_processors/qwen_vl.py +22 -45
  40. sglang/srt/managers/schedule_batch.py +185 -128
  41. sglang/srt/managers/scheduler.py +4 -4
  42. sglang/srt/managers/tokenizer_manager.py +1 -1
  43. sglang/srt/managers/utils.py +1 -6
  44. sglang/srt/mem_cache/hiradix_cache.py +62 -52
  45. sglang/srt/mem_cache/memory_pool.py +72 -6
  46. sglang/srt/mem_cache/paged_allocator.py +39 -0
  47. sglang/srt/metrics/collector.py +23 -53
  48. sglang/srt/model_executor/cuda_graph_runner.py +8 -6
  49. sglang/srt/model_executor/forward_batch_info.py +10 -10
  50. sglang/srt/model_executor/model_runner.py +59 -57
  51. sglang/srt/model_loader/loader.py +8 -0
  52. sglang/srt/models/clip.py +12 -7
  53. sglang/srt/models/deepseek_janus_pro.py +10 -15
  54. sglang/srt/models/deepseek_v2.py +212 -121
  55. sglang/srt/models/deepseek_vl2.py +105 -104
  56. sglang/srt/models/gemma3_mm.py +14 -80
  57. sglang/srt/models/llama.py +4 -1
  58. sglang/srt/models/llava.py +31 -19
  59. sglang/srt/models/llavavid.py +16 -7
  60. sglang/srt/models/minicpmo.py +63 -147
  61. sglang/srt/models/minicpmv.py +17 -27
  62. sglang/srt/models/mllama.py +29 -14
  63. sglang/srt/models/qwen2.py +9 -6
  64. sglang/srt/models/qwen2_5_vl.py +21 -31
  65. sglang/srt/models/qwen2_vl.py +20 -21
  66. sglang/srt/openai_api/adapter.py +18 -6
  67. sglang/srt/platforms/interface.py +371 -0
  68. sglang/srt/server_args.py +99 -14
  69. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +5 -5
  70. sglang/srt/speculative/eagle_utils.py +140 -28
  71. sglang/srt/speculative/eagle_worker.py +93 -24
  72. sglang/srt/utils.py +104 -51
  73. sglang/test/test_custom_ops.py +55 -0
  74. sglang/test/test_utils.py +13 -26
  75. sglang/utils.py +2 -2
  76. sglang/version.py +1 -1
  77. {sglang-0.4.4.post3.dist-info → sglang-0.4.4.post4.dist-info}/METADATA +4 -3
  78. {sglang-0.4.4.post3.dist-info → sglang-0.4.4.post4.dist-info}/RECORD +81 -76
  79. {sglang-0.4.4.post3.dist-info → sglang-0.4.4.post4.dist-info}/WHEEL +0 -0
  80. {sglang-0.4.4.post3.dist-info → sglang-0.4.4.post4.dist-info}/licenses/LICENSE +0 -0
  81. {sglang-0.4.4.post3.dist-info → sglang-0.4.4.post4.dist-info}/top_level.txt +0 -0
@@ -3,13 +3,13 @@ sglang/api.py,sha256=vHiKBg8wwIdmrpnGclop5BzJ-1Q88emrlrfLwNCHg98,7010
3
3
  sglang/bench_offline_throughput.py,sha256=OQb-AjL4UNymmir02ht43uzgaNsnO_I11nXSowKMqBI,13841
4
4
  sglang/bench_one_batch.py,sha256=Fp6HBBJHrw672Q1gnklJ7dYboYYjR92D2fNCvbrM3M0,17935
5
5
  sglang/bench_one_batch_server.py,sha256=8VYNhaQbWGP8TkNVuy_sPjD5FiuVZHamtGRWKwa-Z-Q,5962
6
- sglang/bench_serving.py,sha256=9w5bzcQlm9bo7vWTq3XUJIaKvZy5k2vl4CNjgGq2LV8,56095
6
+ sglang/bench_serving.py,sha256=DKCg7l1uaDlKUB45AIpFaZLesA-sRTV-meJ-50sucXE,57410
7
7
  sglang/check_env.py,sha256=76itNLUw9KlqbiY1BI4u4YaMZaqyCNcrCLUIb6aHflM,8396
8
8
  sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
9
9
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
10
10
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
11
- sglang/utils.py,sha256=Ns1-swsHRb8jYA0wWCQjCR_CDBWIJgyoooEng4fsqQc,16155
12
- sglang/version.py,sha256=Lhj8wFyI_a4P5gAvUzUTThFvAemjjMQEp_O1QuZa64c,28
11
+ sglang/utils.py,sha256=GIcgiRHkZ-gyPxXOdn1qFF41jkg4-YdDxbPc4mzO-qk,16159
12
+ sglang/version.py,sha256=63MgDRjbuREW1iqTD8A1g5qw4uGX6I-V8Zgt5c1nnys,28
13
13
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  sglang/lang/chat_template.py,sha256=xZ-kQpgb4-NY6QOqgRjOODZW5G4EvJnplaqYF6Ng2Ow,18952
15
15
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -24,11 +24,11 @@ sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThc
24
24
  sglang/lang/backend/openai.py,sha256=6I1udBC9obVpJG0GerbFVbWCbwLKkF2DoPdHBCnqPzs,16341
25
25
  sglang/lang/backend/runtime_endpoint.py,sha256=CAVh3X9F80t_2tkJECF__7AdCQtqDg1AHDqIoKIPnvs,16755
26
26
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
27
- sglang/srt/_custom_ops.py,sha256=QumGYYWh5qiwNdg9Uj263Rq26oFX7ldB2hcaqDEQN_k,4976
27
+ sglang/srt/_custom_ops.py,sha256=lUBwC5R2UfjFMA1EtC5Kh2IngsqBJM9IuMW46kJWcjE,3647
28
28
  sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
29
29
  sglang/srt/code_completion_parser.py,sha256=HhEUzdL-FVBsOot9tKDKA1l8Gdx8qsF1RRg-zHNpmLQ,5400
30
30
  sglang/srt/conversation.py,sha256=mzNPZX7ndgmm1E8azuK8eytN6bNCEu3WbcPReRFfhd0,27819
31
- sglang/srt/custom_op.py,sha256=vhdofFbWtpdtaA4NG4tkanWwEsfvBnjh6OPKOxmxXdU,3426
31
+ sglang/srt/custom_op.py,sha256=bIZ__3FiZvkbsN9O_jeLy_49X7ZbYbw0VxoL80uWwaI,3715
32
32
  sglang/srt/function_call_parser.py,sha256=buYENeNEP5bhsvD424yGCa9wOqSfVOZSRn6zLiSJp5I,23733
33
33
  sglang/srt/hf_transformers_utils.py,sha256=_QYTl9LpU0jmKPlYooHi1etwMvb5v40JIrG_t_Fx06w,9215
34
34
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
@@ -36,9 +36,9 @@ sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,
36
36
  sglang/srt/patch_torch.py,sha256=Fw_QGqSsAdyCumi0dT2HyPlppf9xd3-tQPvwuBZfhxU,2625
37
37
  sglang/srt/reasoning_parser.py,sha256=45xsU9RCPfyG4_Zx4y3-JPyNgAtrqwKI4j5R2NT4g1s,5594
38
38
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
39
- sglang/srt/server_args.py,sha256=SEZjYkfjwv0V1xQafh7Zex7NS5MVenfKWytcd4kZ3t8,49024
39
+ sglang/srt/server_args.py,sha256=eb3zJIpljzHK_ajp_zJRgwRUM_00-S-7K15k2opaBK4,52467
40
40
  sglang/srt/torch_memory_saver_adapter.py,sha256=KG3wM9-xZsSdsmORofArnNR7hH55GEyFxaderCDcK9w,1853
41
- sglang/srt/utils.py,sha256=qoo6k_-uxthkMhotF9ba_-njOV-v1AbEcJ9XdQwcha8,56116
41
+ sglang/srt/utils.py,sha256=UyNimlcXkBG5cp-6ah3GaBBTUCpOA-OZh0NaNNh7QgA,57507
42
42
  sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
43
43
  sglang/srt/configs/__init__.py,sha256=vulncVn70WqIT6s0HaB8p_Q6FjOiaLwNZWpoJS9FIuQ,399
44
44
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
@@ -48,7 +48,7 @@ sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51D
48
48
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
49
49
  sglang/srt/configs/janus_pro.py,sha256=-QtJ4ZGZiAJb0AkOEcuCHzIKLw23nF8nRk3rdCcoUO0,19016
50
50
  sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
51
- sglang/srt/configs/model_config.py,sha256=U8cynUgfijwutxZ-8hhoaWTKR5NqpXGIrGxlhbWSBlY,20556
51
+ sglang/srt/configs/model_config.py,sha256=-6SEpQw-U_KWONdNYqEe8nXopqDcDtp4B4g-RZQPhHo,20581
52
52
  sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
53
53
  sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
54
54
  sglang/srt/connector/base_connector.py,sha256=i6i1TIzsz4NbSEkrdMPq-urb2sN2aLAx8dazga4gB9U,2833
@@ -58,7 +58,7 @@ sglang/srt/connector/utils.py,sha256=isTvxauz1_8no5MW6p8Bwj2H9mQkweaRO_RSwAPA7R8
58
58
  sglang/srt/connector/serde/__init__.py,sha256=MvBJ7FBJtnou-AD-EdsCdAIDCcA8kWvUNuRViILVJ-0,718
59
59
  sglang/srt/connector/serde/safe_serde.py,sha256=Iv_mFsDvk-cXuw0WTykBZ2yGxW0jb82DwVqC7r3RmAU,750
60
60
  sglang/srt/connector/serde/serde.py,sha256=n59I2MXLa7WCyN_8pEd8L-scJk7lMhmEX-GOUIhF0ZA,1004
61
- sglang/srt/constrained/base_grammar_backend.py,sha256=q2RTH9hv2nKUF_-HVdZzEUjG6LxejPqXjvUOE1NDWaU,6788
61
+ sglang/srt/constrained/base_grammar_backend.py,sha256=MzAA7oqWOZ12ndUs158FGECjtKNx_2_mDMZ7Jopb9Pk,6899
62
62
  sglang/srt/constrained/llguidance_backend.py,sha256=ej7wN13SzCsT310C6OIyUg2zs5jeuLl3Ocok9SP9-c4,5702
63
63
  sglang/srt/constrained/outlines_backend.py,sha256=UWv2xjg8x4XtoqpY8LoorlJaYOZhfDeIr5YCiFn4knA,6812
64
64
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
@@ -73,14 +73,14 @@ sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUV
73
73
  sglang/srt/distributed/parallel_state.py,sha256=hoTgLYfHIKMb_tSwBTauuusJZ8oY9BsiubTTOF8UfIw,50713
74
74
  sglang/srt/distributed/utils.py,sha256=U-BSaXYjWwnfG8g-tUfBhjKt5Ug097nyHtu3g3aea_Y,8473
75
75
  sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
76
- sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=iLCrFQ3dyH_xZL3vI-paTpxeP7Rt4DszRIk8qdbtfvA,22214
77
- sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=q2q1A_Sqvrvkrgf7Tjg5XhXR1JWzzUUPHSicAKK2SjE,11022
76
+ sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=OClh322wSV28K_LpUyXX2SiasAFh7yZr6vPDG84rj9o,19913
77
+ sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=fLoptT_U0lVAqkhEg-ge53CdFSIKQpDRiqHYKwJVEZg,10974
78
78
  sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
79
79
  sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--ZqapHtvm70Lgl7obtE6ZfgeAiU,10064
80
80
  sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
81
81
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
82
82
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
83
- sglang/srt/entrypoints/engine.py,sha256=b4JmlvK4M2cWfcbc5NOlR7HjkNzDENlzL4rbSaPjBdU,21295
83
+ sglang/srt/entrypoints/engine.py,sha256=1ML85N-iF22n2Id2dpbYrKxxfkqnVUXP26kEGUfcA2E,21146
84
84
  sglang/srt/entrypoints/http_server.py,sha256=VM-gVwW-Ef_SikxoVSE06yydlLo6TGJykjKgffnGoXY,27104
85
85
  sglang/srt/entrypoints/verl_engine.py,sha256=PypBCkUJYy7iP3mKB-W0KYkjZzs4Rq6DqxNgG_nBZaM,5903
86
86
  sglang/srt/layers/activation.py,sha256=1ykXZO0BGz7DFVE-EK26b02I5AgH2IuU4PQB6oUcF4M,6003
@@ -92,16 +92,16 @@ sglang/srt/layers/logits_processor.py,sha256=Vp8ibljVEezTr54xzeOcjiJR7JdYO8ItkO5
92
92
  sglang/srt/layers/parameter.py,sha256=0OTMtmsNds42e3z3wHTRJiUfxCWFwSL6DHrqgeTgGt8,15151
93
93
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
94
94
  sglang/srt/layers/radix_attention.py,sha256=UDL0y4Zasay_Rk-_XmIU4kaGbaF26ONvEHX5EQzLrqI,2260
95
- sglang/srt/layers/rotary_embedding.py,sha256=W4w6Rn-7AgdOqrgVxWY8QJwSpiKUrRbH03bmX6hdw1I,43175
95
+ sglang/srt/layers/rotary_embedding.py,sha256=srAyHlM18hgEQKW7MotYKt1xGkqA_j1PU8g0PO5t3lA,42770
96
96
  sglang/srt/layers/sampler.py,sha256=yipSyN5UWGwGS-BC-WzWMmelys4CCDtK_8b1OpaK6sM,11622
97
97
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
98
98
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=QUxd4sELx6p3dHvEKmccPZ-phdd_9EjNdwjH3SJ9zxI,22238
99
99
  sglang/srt/layers/attention/base_attn_backend.py,sha256=X_GIbQuU9njtUEGdUP7E_KRhmGxj3UyPHNESlL3QaQ8,3264
100
100
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
101
- sglang/srt/layers/attention/flashattention_backend.py,sha256=kiiuklwtEKCgKjQPgvNnl9zjX8IGYqti4j21b-DcRGM,16831
102
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=m1rOYGr9uaxeUPA8BGPGGe8yicLJKNmRm4zxkJoQU9k,45980
103
- sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=BgbGK5iROulOco7o3vYTaPw1Ei3EaQj6Cn9W57Z4amQ,30360
104
- sglang/srt/layers/attention/flashmla_backend.py,sha256=iWzxEEIJ6g3RrovSReqAZMjjcv36qCIsPEHZGs_J8JI,10543
101
+ sglang/srt/layers/attention/flashattention_backend.py,sha256=IwrJRmBFWLUdYqe7n4Dd2rX4zu9V55qBLjCdNQEmkd0,30306
102
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=3fxS2NQzCBw7h_gLxBjHcyDkf2quWqBxr_N01lYmfJo,45865
103
+ sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=pnVhvVEK87iFW8gUb1G7X7c1tqro8R2DSEOFCnlV8Bo,30301
104
+ sglang/srt/layers/attention/flashmla_backend.py,sha256=1RPFNtQOBw6BWxIjrzfJgA9Nx92udLbR-S5KXmqjxS8,10536
105
105
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KABmBrMqKa4x08kkQYdIcZUGydvmaVJIUfo3y8jhFHI,9270
106
106
  sglang/srt/layers/attention/triton_backend.py,sha256=cyxOaUU1CNhaEezJH9j0dd20cwxwIVGGN3jNXFTVkIY,26714
107
107
  sglang/srt/layers/attention/utils.py,sha256=J9mA-cbZT3uTlaKXo0HEAaeMei_TS2o4McTna9LVDCE,2750
@@ -113,13 +113,13 @@ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMP
113
113
  sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
114
114
  sglang/srt/layers/moe/fused_moe_native.py,sha256=KF0J5W5biWYWbERUHAduQFnUV5om9VaYv0I9avsVfgU,4330
115
115
  sglang/srt/layers/moe/router.py,sha256=gvyK7hXlujfCZCmAIFc3oxfgjuAjzlpPe3mp1Blc6Y0,10419
116
- sglang/srt/layers/moe/topk.py,sha256=udkEhgzXbNqk0-bSReeLMDtP_3aOtTlZa6qPn82keVs,8436
116
+ sglang/srt/layers/moe/topk.py,sha256=iUb-64CaNAUfvBZ1pkgsedcLRQs2sVSIzQ5300WmdXI,10242
117
117
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- sglang/srt/layers/moe/ep_moe/kernels.py,sha256=E-VyToHN13f443U3UboDgLwv-t8gAcXOtPHkrI2xUpI,18793
119
- sglang/srt/layers/moe/ep_moe/layer.py,sha256=ZiS9viPdcpB7SjylNVB3DSuF3Az1jGjDPdCqd657Qf0,36990
120
- sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=SPHfAXIkZmFqXMU0a2eob9j_cni269v5jevGm2x1vWM,14845
118
+ sglang/srt/layers/moe/ep_moe/kernels.py,sha256=ijqRzS-tb0LGnDU5hW-g0JH104ppADrWaUIDGxb9Feo,22919
119
+ sglang/srt/layers/moe/ep_moe/layer.py,sha256=1TmWnxv-bW1Qbgru-V-vGnt3ruuTIwHQy0Y5ZA_xzvE,36824
120
+ sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=jnr6KSM8YooftTjZ3gYe0eWpOd1dmkXqk4hKRvLTwCo,19708
121
121
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
122
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=lBF40JXOgUzFMza020gRYol4WdLME9OB-YA9tSfsJ3k,41516
122
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=_ufAA7m6alppU4vVxh7QrgSgFOcBSekLQ-UQdHq7TmQ,53988
123
123
  sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJADUOBJoRHMIWmf-DU4,24100
124
124
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
125
125
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
@@ -157,6 +157,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
157
157
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=tPYxeo_xUOkjQrZMdf9v4IaFrw0RGaZNLGLJPOhjE_g,3260
158
158
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=MW7KzNa7DcKm53u2Jh-mnb93A3ICefgQHkdKONJMfew,3255
159
159
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
160
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json",sha256=JEOXj48phwoumZWBwNq9TpqxVwIrbnfot4QfAdzvLJI,3249
160
161
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XmKFaMheq7NNrsvYCJteul0w809l_l460ZiDQC9ToGs,3262
161
162
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
162
163
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
@@ -167,6 +168,8 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
167
168
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=Y4WpmVGlGERHoeoQNGkQ-GC-MsEtMblqnAVuDbARJdw,3240
168
169
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json",sha256=RUkd9fW9WbajF_fFIzppsE1qyWGR5aRC4Cln-BPdu28,3254
169
170
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json",sha256=Sc9xK1wtRUqIzXppbutcq-Y2e9M0DZl2OGVzzB0aQuI,3265
171
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=z8Iedw3N22cWXTCPhVBBk-yZqkc30ePMzv9KkgPoOd0,3266
172
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0HPFNkhgQ_Yd0190i1bQSgth3q4zCfBgiRQJsITO-S8,3265
170
173
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TBscr1uWwpY0FrKQ5Y3EO_Qg6I97u4f_zjnWRvoeLvE,3260
171
174
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
172
175
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=uv-RwTNZT2n264dLo4eWxUpB3g7QqUyf2MFEGiRvoqQ,3251
@@ -240,24 +243,25 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
240
243
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
241
244
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
242
245
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
243
- sglang/srt/layers/quantization/__init__.py,sha256=ppIxDMFk-XcmDS7SWZe5fc9ymJIu3H3rCHTqm6utKCU,12001
246
+ sglang/srt/layers/quantization/__init__.py,sha256=TxMVIyn2PiP_xrZQugOmzrfZ7DEKEjdNOuFhDNBWBys,12107
244
247
  sglang/srt/layers/quantization/awq.py,sha256=VImnVCU_QBLFba6S88T0dJ-vLy6SMm3OLIMEdllDfVI,6663
245
248
  sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
246
249
  sglang/srt/layers/quantization/blockwise_int8.py,sha256=BS4nmo18QaC3vyCe9Wwe0Te-7FmFUe-udl7xbhRaU0s,14887
247
250
  sglang/srt/layers/quantization/fp8.py,sha256=J9P6SwZ1PIb3dEJoP7X2Uw5VOtU35vU5jugkoYyaZC4,41081
248
251
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=JRalHJ-btDpzl3oXu2R_ZoJBu5TzBBmW_wKZDFs-usQ,24384
249
- sglang/srt/layers/quantization/fp8_utils.py,sha256=OL_tV-NvNGcXV2i3mOXytjzi9d4t59vOxvZN_hBc1KA,21572
252
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=CDR2fLrZa_mZ86n5S2dDjYMpVCGa2n7gCXd2BYZjXcM,21391
250
253
  sglang/srt/layers/quantization/gptq.py,sha256=e4rMz374-yQQqeAI77WPxfcAaRk38GeN2akEpvnC_Do,15141
251
254
  sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRn_imIw8kNgqdtb2lr7BettjgDgimbl1Rubnamjh8,11352
252
255
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
253
256
  sglang/srt/layers/quantization/kv_cache.py,sha256=rJi6amyLZsquUMo_V5iLlPMqdsGTLgxh4popN1xUHCQ,4236
254
257
  sglang/srt/layers/quantization/modelopt_quant.py,sha256=mne4uKF0R-K0OvWN7X5ZxD4LdXKBc6GvmpZzIW6gkmM,6969
255
- sglang/srt/layers/quantization/utils.py,sha256=BmbovB_SxPcV7DRzGod8plawLbWRLIiN8Q2l_oWtAfw,5627
258
+ sglang/srt/layers/quantization/moe_wna16.py,sha256=JlYCiy825MEMlBeQqqWwPuhoH_FbEcgTYlJUzrWj5IE,19248
259
+ sglang/srt/layers/quantization/utils.py,sha256=QqGFwRnFenOm5HfyLoS4D06_LyvNWgOggAiFtZXTpQ4,5637
256
260
  sglang/srt/layers/quantization/w8a8_fp8.py,sha256=XcQdgqXA3eKbAf-4_0I81Y5Nvjns3bQTocovnN8141w,6234
257
261
  sglang/srt/layers/quantization/w8a8_int8.py,sha256=V5vxn0wmUL1szj38lsJOKeNNEvFHisU7hZZLO4FfoNc,8733
258
262
  sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
259
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=YEtWykakVRjS4rko8T7ui0uf_Q15n9SPcASY9EkWYGk,25344
260
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=jHuFgm6s0xxijtceDySj-myyWVn2JJMx4ZXn-4P16F4,24985
263
+ sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=ngKVSHfQUNSZzrLMu4Iv_4Fzt2eOoOIZKcO2RNDiwAM,25353
264
+ sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=roqRrIJybA9YuN3kqSeoLTJhXfTHOOtJd5MkenpOL8E,25835
261
265
  sglang/srt/layers/quantization/compressed_tensors/utils.py,sha256=mnUmKWFQUnY8bVoFHUuNVwqsfS-cefeR-ofyaihCXcY,7621
262
266
  sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=qcdRgoUNJWXqSimns-D987TW0OTk3uFuWNBX8Z6H8Fk,246
263
267
  sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=tdKJC8c3SX8T3z8JL-1YCsg4ftcv55Wxt0vZrYftpX8,1635
@@ -429,106 +433,107 @@ sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=CDGt7lpu9GjykgMtmwbZ3PEqjTlR
429
433
  sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=HTfU3HxxxVyaG_aJrrVjPJTnqf62yvepcKJKYkG0XJQ,5944
430
434
  sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=ZmWEqHJaorRNNj-c_ZXPi_pX8X_yIAwudRHAJVa0m08,4350
431
435
  sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=Q58UzWUb3QFqY_ZxWA3poN373N0Hwkks5AQRKIuvFC8,4517
432
- sglang/srt/managers/cache_controller.py,sha256=BRRyu110lJalLVLZbTCMOWbD4s-LG3f9Xz6sxU2dCrc,18144
436
+ sglang/srt/managers/cache_controller.py,sha256=d4RGqbut1FlzJnpqr7WY_TYmRjYPS07OoOVbztjs5xI,18959
433
437
  sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMafoEADLIVfIM,1633
434
438
  sglang/srt/managers/data_parallel_controller.py,sha256=Xkj2n9uDyq7a-AVDZlfzeuNkC4ibsSftb1_bed9hgQ4,10318
435
439
  sglang/srt/managers/detokenizer_manager.py,sha256=HTfpJWMF1EImhKOnLJ96xPmYXm71xzaisLMfxg3zpgs,10111
436
440
  sglang/srt/managers/expert_distribution.py,sha256=r3o5RGI0gnV7xb60AApqKYa0oiSB37oB7hQBX7P3xZM,3225
437
441
  sglang/srt/managers/io_struct.py,sha256=_WYQ2x49Wc8WqmZH0Q5Te7zVrGRQkbn0ADghuDwyk7k,23852
438
- sglang/srt/managers/mm_utils.py,sha256=KzodrStj3ouIEHLRja3TUml0YUQ59qmEPejks5ikPQk,13828
439
- sglang/srt/managers/multimodal_processor.py,sha256=nTlktTXYo2NKr5Ab2AaSjydNQDdwpJHs4XdYr5zdtkU,2154
440
- sglang/srt/managers/schedule_batch.py,sha256=2HOloc18BAM10aL8os34sQ3KM3sYi-yhDDahlTxrBtU,60106
442
+ sglang/srt/managers/mm_utils.py,sha256=KxZF684q0ohUn4J4dPMdDfGtOKLyWUZ3o7yG-mGcjnY,15464
443
+ sglang/srt/managers/multimodal_processor.py,sha256=37SSZIdhdmcGaZSH2A2GLdntcbIxDUiomX6WR_BpmtQ,2132
444
+ sglang/srt/managers/schedule_batch.py,sha256=oHXIHW8imrDnV0PuOjysG9qvEVYqQY9XGCTMKzDd-6I,61013
441
445
  sglang/srt/managers/schedule_policy.py,sha256=E1qVq2G3jptKdX9nlqfayeRBUll9xB6bK8nBf3EW32E,19469
442
- sglang/srt/managers/scheduler.py,sha256=kH8oC7R_gir2Whsq2NMCbzdmd8nqGx0GXqXuVwr8dTE,80004
446
+ sglang/srt/managers/scheduler.py,sha256=hSYFlzkr20ZCYVvPfffmmG_aQeLUx9xjDCcvudEnU3Y,80024
443
447
  sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=u2sj6MViFTov0lVZSysZ-wph2pEqRCtCjwA1UdttZ7I,26338
444
448
  sglang/srt/managers/session_controller.py,sha256=o-ifit0n4_xHLNmyD0Ams8FxGRgxFybX-Vz1hwgr3UQ,5755
445
- sglang/srt/managers/tokenizer_manager.py,sha256=cYzNwHKr5Y5LMwHeU1YIyTIR88-QMpMYqcWt1nm8KLU,46161
449
+ sglang/srt/managers/tokenizer_manager.py,sha256=dSuYrkMNZUcI1hSNP4sw5fHV4FdndysNeRG1NsxBEjo,46163
446
450
  sglang/srt/managers/tp_worker.py,sha256=IFiOhbNIya-7cqgp_Yg0ZXGcsgy9YS295AfxJYjFqzQ,8833
447
451
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=3_ZJ8Rq7v2ZDaRNTRu5Dy8AbqiAlJQp3IAKnn_WAwd8,9127
448
- sglang/srt/managers/utils.py,sha256=59IuYvuEfi8ji_acKqH3Y1-2PaA_dSlZMbHSCqjQfCA,1629
449
- sglang/srt/managers/multimodal_processors/base_processor.py,sha256=PpNwQBhEuXbztzS3cBFu7UL5sfKYay-WoqAweSVRRao,9984
450
- sglang/srt/managers/multimodal_processors/clip.py,sha256=cuC7jynTkqRAm_yixey0Tv1KSblI97lCzOuX8-Ix8sg,2148
451
- sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py,sha256=-qU99yCs3TYM8g9vOo5_SIWX91theu0ZREXZT5-A9yY,4594
452
- sglang/srt/managers/multimodal_processors/gemma3.py,sha256=Ic8wTOS5TS9qc-SsGkKWQ_7NgnimCnnBeXw4UB8DTgM,2745
453
- sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=GaVH3_HxGcQHRvFuP5zpkQ9m678cyVrbjb1iwwdV_fE,2780
454
- sglang/srt/managers/multimodal_processors/llava.py,sha256=2UfMVl-CoQ00Wt8XCZLkE3G7mFdU4Ol1aid6PPG-ZXw,5711
455
- sglang/srt/managers/multimodal_processors/minicpm.py,sha256=eD-FgRMTSF9jvolMQYfIgzt31qs0SyLEcVrpHaf46fU,5979
456
- sglang/srt/managers/multimodal_processors/mlama.py,sha256=K-_guz4aaxR8R2zm9MAnLy7K66fkd_0J6R9P_4wdIb4,2118
457
- sglang/srt/managers/multimodal_processors/qwen_vl.py,sha256=OBSKa6LGhTl6wJJ24Bk0IibcKM97Q-6XeBJ1jxLGMbY,6654
452
+ sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
453
+ sglang/srt/managers/multimodal_processors/base_processor.py,sha256=8ELm-cEJgFnOh8DxzsgRlCEGjewA68IygJRZGlF8Azw,8923
454
+ sglang/srt/managers/multimodal_processors/clip.py,sha256=lRc2mcuDbAhZVf-0EfkO81pqDiol9zLvTpDqtPIBQ2k,1525
455
+ sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py,sha256=j7j1D38azudJjYthVpdz7jxQ9Z7SjwQfskpOIshAdiY,3147
456
+ sglang/srt/managers/multimodal_processors/gemma3.py,sha256=UlkyIoc8XOw69iFBYiBYLx--pdfnM4JfCFtwRrd3w-o,2267
457
+ sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=wZs4HZhPov7yvV2VU2ep4k1ANOimVqPRIs3cpC-O1I4,1820
458
+ sglang/srt/managers/multimodal_processors/llava.py,sha256=8mac3vUUpVd12o43k1TyMaLEySZB915ks8Q5epeZmbg,6209
459
+ sglang/srt/managers/multimodal_processors/minicpm.py,sha256=Mq-iH2j90VrGAbSaF3ayYWhTEm9RvWNI6ZhBb6G23dQ,5684
460
+ sglang/srt/managers/multimodal_processors/mlama.py,sha256=MLiGS606LzVtdoXvjWGANx-K_7nE9J_fMVmkXN7Gz8k,1661
461
+ sglang/srt/managers/multimodal_processors/qwen_vl.py,sha256=67EmFiAkvZncU-eqiiS0Q4dr3pWcfI-RofYiQnNWvu0,5722
458
462
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=NY62Zo0A0tLJ7ObRLOQqQcXCxoJUDZsK8f5U4dNQjKc,973
459
463
  sglang/srt/mem_cache/chunk_cache.py,sha256=it5SfL1FwMbrdeOH-I-Eu_i-I9hFB1xL-z_brIUoCkk,1835
460
464
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
461
- sglang/srt/mem_cache/hiradix_cache.py,sha256=BjPZXHYmCW6Sdohb67Qqzf2T0YXkO0hEPyynvn65xHs,15956
462
- sglang/srt/mem_cache/memory_pool.py,sha256=xfEFCYs3DsW7t5Nuv9pwFKtap3SJgxkvXToKQFUT-rI,28255
463
- sglang/srt/mem_cache/paged_allocator.py,sha256=Fl6d8rgkwGIgq3n7AKM7Jmm_aIgu86jJ24636nIkX6s,8594
465
+ sglang/srt/mem_cache/hiradix_cache.py,sha256=t3zxOCg8A4uMdjrtKbSdDJBwFubqnhfGOEdSs_22Zb4,16161
466
+ sglang/srt/mem_cache/memory_pool.py,sha256=v5Mzx1VuyRpZ7P3liiKKfXuw_t24kjYPpqV_ZCwGCeA,31098
467
+ sglang/srt/mem_cache/paged_allocator.py,sha256=BrJS0vN1k-vTSgb_M8u_1KoZFRgzgR1WRyImCTq3T0U,9770
464
468
  sglang/srt/mem_cache/radix_cache.py,sha256=Lm-pco6CJ4orb9IfDpbHm5MnyK8Ya0OF1x9p88dv548,14906
465
- sglang/srt/metrics/collector.py,sha256=0X40ZZ18182sx2t0eqeqoK7gspH36L343zNvSkgBvd0,9293
469
+ sglang/srt/metrics/collector.py,sha256=aCxHqgsQ6P8ZxsAvq_MoEVsr3KUvIUSOBpGYMgBxmOM,8442
466
470
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
467
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=rIxQ4Yvt5-i5oDBxkzEm3OtvWid7yx73jUtMILF6WWQ,23207
468
- sglang/srt/model_executor/forward_batch_info.py,sha256=RKRQMoMcAexG-YweQKl-uSXc4qe-yk4u6iTtWK_BOpw,19591
469
- sglang/srt/model_executor/model_runner.py,sha256=TMcejBnQxNSZGst-8B04z9IX6smVxCjTeA6dy2-5U84,44807
471
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=bDLOqlxdwRUyKitG8JyZygnm05N00q-TdNiAayG_T8o,23223
472
+ sglang/srt/model_executor/forward_batch_info.py,sha256=8VI1VxSmyH26lIHnCNeGqYw2XxslbqN_cuSUIEPUtRU,19468
473
+ sglang/srt/model_executor/model_runner.py,sha256=f2e0Ts7eTy5_xCIp7q2If4Mi6Oi3nw45zyBPvNgR_-0,45006
470
474
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
471
- sglang/srt/model_loader/loader.py,sha256=mK9ZspmhUhw5A0e5G3DF77HJ4-JsDvzLvwj5I2J3Vdg,53883
475
+ sglang/srt/model_loader/loader.py,sha256=AUS4SqSFghbQjs29C65lg7_zxR9h1t7N5G0gERjc0Rc,54238
472
476
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
473
477
  sglang/srt/model_loader/weight_utils.py,sha256=Bkfgz6TUjkJJb8BiFxrv7FgbZFd9eW2y21jDBPdLWSo,32056
474
478
  sglang/srt/models/baichuan.py,sha256=iXgta-W38OWesxmXWZJ73fUvPdu51EwTQzUD5mmfJ8s,15721
475
479
  sglang/srt/models/chatglm.py,sha256=avLC7mjjGskBxCxy-9s0sMlAJjfFoG_y8VieR1QfDsM,13918
476
- sglang/srt/models/clip.py,sha256=vNKS-wWs6Pl8hwJUY-3g8L3uax9Z_Voc2CF0C8IEEiQ,19554
480
+ sglang/srt/models/clip.py,sha256=fCMtAcaKjruSIWfD4YGb4HXh6Tzp2pjpgDmp5JpwBPU,19794
477
481
  sglang/srt/models/commandr.py,sha256=Ug-B0QcdWZufrTybC6K5yP3MLKNsYb-vzfrqUsXYUcI,15276
478
482
  sglang/srt/models/dbrx.py,sha256=0Vf4yhqe8YeQuKR3P-agvYJScmHwH3-tFbyU8kv5QJM,15559
479
483
  sglang/srt/models/deepseek.py,sha256=Le2MXij8m4hT7QYgD0bFMFmYhbgX7SMjoXZFB8BxgyA,16871
480
- sglang/srt/models/deepseek_janus_pro.py,sha256=D5nJgMmrIqXW71r9_-_yd63G8DftnM1K8mS4hU8O_pc,70547
484
+ sglang/srt/models/deepseek_janus_pro.py,sha256=8wAzvcGdyo--3faMN4QtagT1eAZMhMFduvpCXqUS48Q,70456
481
485
  sglang/srt/models/deepseek_nextn.py,sha256=kca-2Fm2_SmqbOEFfd80pobooi1BXd1oe_4EsUM6SeI,13561
482
- sglang/srt/models/deepseek_v2.py,sha256=7ljI2R9GmngO4gcrV3MLmioxeEKgCFyG-YL_J6QYA8Q,58695
483
- sglang/srt/models/deepseek_vl2.py,sha256=T7zdLBnnBSVPCX4cec1B-MbnXru1-70KqEdDLqbYb_g,12956
486
+ sglang/srt/models/deepseek_v2.py,sha256=HJ8cuH87E_EF62YXlmYBjGVJk5P721T5M-XKMqsrbYg,62633
487
+ sglang/srt/models/deepseek_vl2.py,sha256=RVvi_3qsfrkqMCCnjjTA8OwUc5ySutc7asAH-rUJLVo,12922
484
488
  sglang/srt/models/exaone.py,sha256=5iibqQTjpgosuGRt2rj2lWR0ShK2XGhbdFSnOWpaQss,13386
485
489
  sglang/srt/models/gemma.py,sha256=3XxMDOKz4xMP6VzWoW8f0hmMf8LP8fhzMw5prsYC4e8,12602
486
490
  sglang/srt/models/gemma2.py,sha256=MDe_HNkSpEJpw426tbx3fp271GBlSVEuhIdGeOB_jYA,16356
487
491
  sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67Ksv2A,2618
488
492
  sglang/srt/models/gemma3_causal.py,sha256=nKO-DRtvcXn5bHquxILgnp0fJT6usoB81W8kPVdYsA0,24934
489
- sglang/srt/models/gemma3_mm.py,sha256=ZFgHB9-GBNZcMtYvUdynk16mWmKQ-oYxkDHhttWNQB8,17854
493
+ sglang/srt/models/gemma3_mm.py,sha256=tWX2vIdRf5zePwKMLbb0d24DUWoTdjmdXnxIcULQJ2E,15221
490
494
  sglang/srt/models/gpt2.py,sha256=dAnfmsAL7JVHakryqrERR1jgL8mI1Op6nPHYfDCF7Ao,9802
491
495
  sglang/srt/models/gpt_bigcode.py,sha256=EAN6xAXpa8m3DcBuH1D4rTPji2oG9NSozGXSNHtE2lw,10268
492
496
  sglang/srt/models/granite.py,sha256=nu_Zl_PYn188gk1uYVZ76y4wwHZV7G0w7uanhqpSFUs,20813
493
497
  sglang/srt/models/grok.py,sha256=pQOXtpHOYVntwt5QQRLffYsnMHmMfPMmGyKMfR0k0Ic,27994
494
498
  sglang/srt/models/internlm2.py,sha256=4eh9WVgK4yg13IsnH5qB2xUCWnixj_aLLz7qa_4m2_Q,13017
495
499
  sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
496
- sglang/srt/models/llama.py,sha256=0jD_xcCuLfDGfHrzwn3oRZvRxilaWb4ARasu5U1x9zQ,24529
500
+ sglang/srt/models/llama.py,sha256=FRdD-oCQl1M4hkYwIrWuECQ1tgxBq9rEIYOr-rlgswE,24616
497
501
  sglang/srt/models/llama_classification.py,sha256=4QWTFaUZIFKYZvEzs8bx8VkOZNIwdYCLrnwrdAw4QK0,3108
498
502
  sglang/srt/models/llama_eagle.py,sha256=OB2lKsjn7BcfCZljklnhk83me8j0PuQmYLou7baNcq4,4866
499
503
  sglang/srt/models/llama_eagle3.py,sha256=v3bftBVDIGjnzngQYnu19cy0J_3w7yruHqLP5nsAQDM,6642
500
504
  sglang/srt/models/llama_embedding.py,sha256=zq-_lNu35VBFc7eemiam0zdkGIE8fzrgk5OWYfirZnA,3254
501
505
  sglang/srt/models/llama_reward.py,sha256=LF2nqMV5XOrljGjAwJg43mBv3z6Q040I2EYlgZeCp8k,4681
502
- sglang/srt/models/llava.py,sha256=G3aaWr0Pldx-yqmbv33KMMwpJ7TAW0yIY-eE9bFfWto,26785
503
- sglang/srt/models/llavavid.py,sha256=w_eaZRSObvqftKE_s0cAE5NvUyFtHfU1EiD0XFBfSow,12492
506
+ sglang/srt/models/llava.py,sha256=KMwNNrlMuMaKEOZMDRBKBQbe6uctpKTLc0zOceyGC34,27242
507
+ sglang/srt/models/llavavid.py,sha256=q0lHlRnoYHKJZsWnkIQdd6dYAQ26t7XsmrqA0zDGmZc,12829
504
508
  sglang/srt/models/minicpm.py,sha256=-ot45U_Bv4x85JdbIAQXoxa1sF-ZDkBk8flU-Ruli5Y,14652
505
509
  sglang/srt/models/minicpm3.py,sha256=sRHPFUH636GIY94B-hpAN2MSzYT1pzLPVypTNjUtttY,26270
506
- sglang/srt/models/minicpmo.py,sha256=b2TE13gXgfcBg9ibYaIHbFFtsMbPahs_qJC01BaMKu0,79521
507
- sglang/srt/models/minicpmv.py,sha256=cMKLdprNl9siOlHJDY9OPRRDBCax84zHWCj16m-kzaA,40658
510
+ sglang/srt/models/minicpmo.py,sha256=kJnp8UwJTV7kXEpuVWA50ecRsuZyFedHlwkprix8tag,75619
511
+ sglang/srt/models/minicpmv.py,sha256=79zZn3co9r7SERatx49EuHRoLWRiy6qeaUFgjDWJo2I,40571
508
512
  sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
509
513
  sglang/srt/models/mixtral.py,sha256=6Fse2J-20IMylP-yzpEihIinaH37TmmslATbLcWBRYY,14926
510
514
  sglang/srt/models/mixtral_quant.py,sha256=MSa6UKPbgv8Rn8Iv8o1dQhcstAHLNQzE0eepFx_hYSw,15221
511
- sglang/srt/models/mllama.py,sha256=1hJiW6AHakH3QvGC2zKxuvZfo6YO5fbAmp5W-i2swB8,37891
515
+ sglang/srt/models/mllama.py,sha256=SsK_cEolaeoXh_HkyXsSF2ueYR3sPv1NvnGH2k6Aqx0,38461
512
516
  sglang/srt/models/olmo.py,sha256=FJk8A3T3TF5QcTV6rMP8np94QtvxpMWlgCsv_5VwpVE,12632
513
517
  sglang/srt/models/olmo2.py,sha256=U0ScFzWazOrb_Q90sfXkpVNAsXT-pgZbNgGh80R40VE,14288
514
518
  sglang/srt/models/olmoe.py,sha256=tx5OKWLOr6_pohe2eBcIodCmcuSjtpteHq_tG_QVYCY,15910
515
519
  sglang/srt/models/phi3_small.py,sha256=6p-5EBbwN3FmhoL0VNrNb6VP7HqIhYgVBfQ98L6Rjjo,15469
516
520
  sglang/srt/models/qwen.py,sha256=edS0UYq6AoHZdYUJtQa5wyFNzZMW0JAMmBulH2uheaw,10719
517
- sglang/srt/models/qwen2.py,sha256=D7VIFWbQ_MxY42K9j7ksgNTP9TNh_FA8u4FUND2xSuk,16038
518
- sglang/srt/models/qwen2_5_vl.py,sha256=iZK_giWA58sb7cK9u_T-F3YgdgwDBq79nl2CszTL7Uo,24506
521
+ sglang/srt/models/qwen2.py,sha256=2C5wJXPsaETMGOojZfQ3v2LmqxtVldxu6upZq7ZTqB0,16142
522
+ sglang/srt/models/qwen2_5_vl.py,sha256=azSqNvzEQT4ykx8f0X8mfh2LTSu2OhO0PEES0K09pro,24329
519
523
  sglang/srt/models/qwen2_classification.py,sha256=dGrMm4ebd30_lBhHOhaV57ig2iOTx3nqB4GEzsrRIM8,2747
520
524
  sglang/srt/models/qwen2_eagle.py,sha256=Iz0HWL2FgSD3FqoFhfYmbIZeEYkPTJ96lYbkncmHJX4,4644
521
525
  sglang/srt/models/qwen2_moe.py,sha256=9cLOPHBpwdID92Ed1CEjMUxMxcWX83lklunHbyK4To0,18206
522
526
  sglang/srt/models/qwen2_rm.py,sha256=-mQXDEv11p-I1HXgYLTtY6ROem6UYorO958WsDrzsgs,2837
523
- sglang/srt/models/qwen2_vl.py,sha256=JlcMUKKi2ynZR3vQN5gAwL13KEowq4Irc3Vj2WGpdnE,21983
527
+ sglang/srt/models/qwen2_vl.py,sha256=HMA6bww6bCYp7hTPUqSOigSCQRbhonKjTS6lxakclAM,22092
524
528
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
525
529
  sglang/srt/models/stablelm.py,sha256=w93fNXpDwQbuKi4tdeo0bsXFZrMZVY4_pgNL0E5RErQ,12242
526
530
  sglang/srt/models/torch_native_llama.py,sha256=5tfFSMAXB3ScToqTALtCXa8Oo-qPCJh-KQCNB6QOlNA,19293
527
531
  sglang/srt/models/xverse.py,sha256=I7ivNsk6NRqPxlMUmdclpzDCvhAnWbv_GOj01MKHJrQ,13996
528
532
  sglang/srt/models/xverse_moe.py,sha256=xLwn5pRwQrvj7zMmwl3o49m7xILb2ACRdWvm9hY8LDc,16743
529
533
  sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
530
- sglang/srt/openai_api/adapter.py,sha256=q0364qVBd-iymVnVBngv4ZMdZorl5XEYN6u8ZZCaDTs,71454
534
+ sglang/srt/openai_api/adapter.py,sha256=DRHA38G0T9EV2npsKZPBBH4RGJocjZtIov3U5d5VDX0,71919
531
535
  sglang/srt/openai_api/protocol.py,sha256=Y8PFFhLbzhpoERM6-WsTkm-ZuGcE-3tfenh9e-AC1vc,13374
536
+ sglang/srt/platforms/interface.py,sha256=hym3iooBB4C8if5hDZezgVN6h4NIOu7sg2ZUBIV6XmM,11246
532
537
  sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
533
538
  sglang/srt/sampling/sampling_batch_info.py,sha256=wrGGU27mWOi_yCBBCOvTQLBdyTjfkPuj7Hsk0zOFyH8,11989
534
539
  sglang/srt/sampling/sampling_params.py,sha256=nXm44Inn91YtrMpAm5mDb6-97owRy-Bh6lZ0BIpw73I,5919
@@ -538,9 +543,9 @@ sglang/srt/sampling/penaltylib/min_new_tokens.py,sha256=rdU_D7RoIcrQPhysNQEzmr4T
538
543
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=XM-Lm6u7gYPtMZrTIc0FR4QxNZxBH5s_Cj82umyCzYk,5721
539
544
  sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=NRh10AJrrQlGJ6S-enGdRefrTrWpyqrSm-aNnyqQNQQ,2119
540
545
  sglang/srt/speculative/build_eagle_tree.py,sha256=SFQ3eHbhfNxOdxgqDP5wSV_ZlIVqLw7VivycNZ963N0,11690
541
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=t2mbGZY23E7WraJJQW_4uaLTfnbPDExRVC7jldtuY6s,9287
542
- sglang/srt/speculative/eagle_utils.py,sha256=_T3eRd59bzCR2YE_Z4W6Pux40KpcueWwN-rrLe-ikFQ,25812
543
- sglang/srt/speculative/eagle_worker.py,sha256=W6hoEW0tpNaoC0T01vEEMJOwaiZjhH4rtbvd2FOKxiY,24229
546
+ sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=Bcsp4g0VvBmsrclkgKq512skfw3hkO2zkHX_91pBaAI,9252
547
+ sglang/srt/speculative/eagle_utils.py,sha256=0kxQ69XNKO52qgKz-afO1aNF5Tbf5g1HHB7GMuUROG8,29074
548
+ sglang/srt/speculative/eagle_worker.py,sha256=TysB0F6tFjblIjqoD2nlKQPNBMszDilsII7-mFWFjmo,26999
544
549
  sglang/srt/speculative/spec_info.py,sha256=rhaKG0TzyF9XZYHEWp1jccwTBohSNsUDvxHFtAoOl18,709
545
550
  sglang/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
546
551
  sglang/test/few_shot_gsm8k.py,sha256=7VLbWl4nCQs1wjtW4q-46jf9jUCycSs5Iw8v7sUSzBw,4284
@@ -557,15 +562,15 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
557
562
  sglang/test/test_activation.py,sha256=GeTIJHxlLQfW3kM-X1FGa8Sa3dSGKHEXl5wEy-hfGis,1489
558
563
  sglang/test/test_block_fp8.py,sha256=IqdQKt23annq_QR1gwVX0vzdMyWTEBLRhmPiLMemKI8,14458
559
564
  sglang/test/test_block_fp8_ep.py,sha256=N1rvqbPErBaFFpeAw8TLYXGNZOoG7cfIBP2p5XbSyMo,10806
560
- sglang/test/test_custom_ops.py,sha256=XBTWh3jEsoe9hZ93p3LAUtjEj5l0qNEaZM5Mto6pbJA,3262
565
+ sglang/test/test_custom_ops.py,sha256=4X3-odkJntwNtBAuKtCbYHu6peIP6LaI_VwLw7kmDx8,5550
561
566
  sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
562
567
  sglang/test/test_layernorm.py,sha256=2GMWqqNDuGvSMSsEBF5eDCzwVSYA9E6hGhRo6s4ecKg,3764
563
568
  sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,18877
564
- sglang/test/test_utils.py,sha256=bjRoX9pp98l9AKr0q9TgK8rVNxqMJ-1MeEZdrhSaMPw,30694
569
+ sglang/test/test_utils.py,sha256=jUkIDxJ7I8hCPk0XF7F_IWJkOtn6O7eXJG5pI0cduwo,30463
565
570
  sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
566
571
  sglang/test/attention/test_flashattn_backend.py,sha256=OxS1KsPs19nwZcDtdURj7_liT1cIfEXb6W4FH9KMaaE,10808
567
- sglang-0.4.4.post3.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
568
- sglang-0.4.4.post3.dist-info/METADATA,sha256=jVtIt2_q-sSQu5V2AzV5Ex2pRWTJHW9PrNe_NuQ6WpM,25024
569
- sglang-0.4.4.post3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
570
- sglang-0.4.4.post3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
571
- sglang-0.4.4.post3.dist-info/RECORD,,
572
+ sglang-0.4.4.post4.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
573
+ sglang-0.4.4.post4.dist-info/METADATA,sha256=gHc8ZNyeSnw_PrtGOQ7SAVJwCqonaOP4k0vADEpMqv4,25067
574
+ sglang-0.4.4.post4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
575
+ sglang-0.4.4.post4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
576
+ sglang-0.4.4.post4.dist-info/RECORD,,