sglang 0.4.0__py3-none-any.whl → 0.4.0.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. sglang/__init__.py +1 -1
  2. sglang/bench_offline_throughput.py +18 -6
  3. sglang/bench_one_batch.py +13 -0
  4. sglang/bench_serving.py +8 -1
  5. sglang/check_env.py +140 -48
  6. sglang/lang/backend/runtime_endpoint.py +1 -0
  7. sglang/lang/chat_template.py +32 -0
  8. sglang/llama3_eval.py +316 -0
  9. sglang/srt/constrained/outlines_backend.py +5 -0
  10. sglang/srt/constrained/xgrammar_backend.py +9 -6
  11. sglang/srt/layers/attention/__init__.py +5 -2
  12. sglang/srt/layers/attention/double_sparsity_backend.py +22 -8
  13. sglang/srt/layers/attention/flashinfer_backend.py +22 -5
  14. sglang/srt/layers/attention/torch_native_backend.py +22 -8
  15. sglang/srt/layers/attention/triton_backend.py +38 -33
  16. sglang/srt/layers/attention/triton_ops/decode_attention.py +305 -350
  17. sglang/srt/layers/attention/triton_ops/extend_attention.py +3 -0
  18. sglang/srt/layers/ep_moe/__init__.py +0 -0
  19. sglang/srt/layers/ep_moe/kernels.py +349 -0
  20. sglang/srt/layers/ep_moe/layer.py +665 -0
  21. sglang/srt/layers/fused_moe_triton/fused_moe.py +64 -21
  22. sglang/srt/layers/fused_moe_triton/layer.py +1 -1
  23. sglang/srt/layers/logits_processor.py +133 -95
  24. sglang/srt/layers/quantization/__init__.py +2 -47
  25. sglang/srt/layers/quantization/fp8.py +607 -0
  26. sglang/srt/layers/quantization/fp8_utils.py +27 -0
  27. sglang/srt/layers/radix_attention.py +11 -2
  28. sglang/srt/layers/sampler.py +29 -5
  29. sglang/srt/layers/torchao_utils.py +58 -45
  30. sglang/srt/managers/detokenizer_manager.py +37 -17
  31. sglang/srt/managers/io_struct.py +39 -10
  32. sglang/srt/managers/schedule_batch.py +39 -24
  33. sglang/srt/managers/schedule_policy.py +64 -5
  34. sglang/srt/managers/scheduler.py +236 -197
  35. sglang/srt/managers/tokenizer_manager.py +99 -58
  36. sglang/srt/managers/tp_worker_overlap_thread.py +7 -5
  37. sglang/srt/mem_cache/base_prefix_cache.py +2 -2
  38. sglang/srt/mem_cache/chunk_cache.py +2 -2
  39. sglang/srt/mem_cache/memory_pool.py +5 -1
  40. sglang/srt/mem_cache/radix_cache.py +12 -2
  41. sglang/srt/model_executor/cuda_graph_runner.py +39 -11
  42. sglang/srt/model_executor/model_runner.py +24 -9
  43. sglang/srt/model_parallel.py +67 -10
  44. sglang/srt/models/commandr.py +2 -2
  45. sglang/srt/models/deepseek_v2.py +87 -7
  46. sglang/srt/models/gemma2.py +34 -0
  47. sglang/srt/models/gemma2_reward.py +0 -1
  48. sglang/srt/models/granite.py +517 -0
  49. sglang/srt/models/grok.py +72 -13
  50. sglang/srt/models/llama.py +22 -5
  51. sglang/srt/models/llama_classification.py +11 -23
  52. sglang/srt/models/llama_reward.py +0 -2
  53. sglang/srt/models/llava.py +37 -14
  54. sglang/srt/models/mixtral.py +12 -9
  55. sglang/srt/models/phi3_small.py +0 -5
  56. sglang/srt/models/qwen2.py +20 -0
  57. sglang/srt/models/qwen2_moe.py +0 -5
  58. sglang/srt/models/torch_native_llama.py +0 -5
  59. sglang/srt/openai_api/adapter.py +4 -0
  60. sglang/srt/openai_api/protocol.py +9 -4
  61. sglang/srt/sampling/sampling_batch_info.py +9 -8
  62. sglang/srt/server.py +4 -4
  63. sglang/srt/server_args.py +62 -13
  64. sglang/srt/utils.py +57 -10
  65. sglang/test/test_utils.py +3 -2
  66. sglang/utils.py +10 -3
  67. sglang/version.py +1 -1
  68. {sglang-0.4.0.dist-info → sglang-0.4.0.post2.dist-info}/METADATA +15 -9
  69. {sglang-0.4.0.dist-info → sglang-0.4.0.post2.dist-info}/RECORD +72 -65
  70. {sglang-0.4.0.dist-info → sglang-0.4.0.post2.dist-info}/LICENSE +0 -0
  71. {sglang-0.4.0.dist-info → sglang-0.4.0.post2.dist-info}/WHEEL +0 -0
  72. {sglang-0.4.0.dist-info → sglang-0.4.0.post2.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,19 @@
1
- sglang/__init__.py,sha256=3M0oz0ZA8fULhV5LwQ4hxh-MRdHsOJRD1D63C60pdG4,1616
1
+ sglang/__init__.py,sha256=b2oIdWzp5P8SzieeOs2TzJoN3Do3tfJbV8gZS_imVcs,1619
2
2
  sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
3
3
  sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
- sglang/bench_offline_throughput.py,sha256=3OrFI26PmoVTU3pQrBFC50AZI7HpKKuk4vYycbkDjhY,12428
5
- sglang/bench_one_batch.py,sha256=vxXSCQRTMeJUtJKsSoP6tLdoWTdFp1mhwsLpKHccs2c,15858
4
+ sglang/bench_offline_throughput.py,sha256=rgMWDhA1Hai0gKBzxc0dzTWfI8l39Cyw2VOCyMt1YyY,12771
5
+ sglang/bench_one_batch.py,sha256=aF0onHeRjy7AYVjsq1IA3rZEhUuYXuslg1fAhuvJ2yo,16120
6
6
  sglang/bench_one_batch_server.py,sha256=-fV9FTLNNcSIy0pgYeggXedPVK0fVsXZqVQswT8OMOY,5945
7
- sglang/bench_serving.py,sha256=Oa_Qi7YApv37jGDAmuIaZSIhayvRpKq9GZGZLXBU-9I,52924
8
- sglang/check_env.py,sha256=q1sdYL-gcKSCeIZMk7sUMh9rjM71f-EUgp07OGPSbZM,5446
7
+ sglang/bench_serving.py,sha256=zv_EcbWno79j7WYFL2m6BfCLT6iSOfGV4uwGbDg9KQA,53141
8
+ sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
9
9
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
10
10
  sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
11
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
12
- sglang/utils.py,sha256=r4Dw-xffcrTRposls-gqyoYxjgJNYhVduK_6bDN_Vj4,11526
13
- sglang/version.py,sha256=42STGor_9nKYXumfeV5tiyD_M8VdcddX7CEexmibPBk,22
12
+ sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
13
+ sglang/utils.py,sha256=23jf4Mz8E5p5a6JOkjnfYZixdjZUk88F_mZ8rZcby5Q,11597
14
+ sglang/version.py,sha256=OUNovuQ1RrdJFYetl0e0U0556H_wiyjhVks9-l-zF94,28
14
15
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
16
+ sglang/lang/chat_template.py,sha256=cnfjjxIIcYRGRxXlJlOGnpFxFuhMHut7DS52LsOMKcA,15826
16
17
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
17
18
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
18
19
  sglang/lang/interpreter.py,sha256=SBjejhLhTKzNM0HbjtTg5r17WPJ64WFSk6lcM_SCWKs,30717
@@ -23,16 +24,16 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
23
24
  sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
24
25
  sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
25
26
  sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
26
- sglang/lang/backend/runtime_endpoint.py,sha256=IWbrAKrUkzNOvwV6V9_y6pkTr2SUYEkKBT-3kirgad0,10514
27
+ sglang/lang/backend/runtime_endpoint.py,sha256=dfs-yZ1ekKmnbpZLluQHWPmMeZJKbaaZRRGYRa9eBE8,10541
27
28
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
28
29
  sglang/srt/_custom_ops.py,sha256=Y4gyTDGhWz-W2Igq25Ojm8XFiyvkawW9I-79iwYvxJ0,3574
29
30
  sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21201
30
31
  sglang/srt/hf_transformers_utils.py,sha256=38Ms0H2-VMerOS6jnczcFtZMS6lhw9B5rSWKAfxVUfQ,7945
31
32
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
32
- sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
33
- sglang/srt/server.py,sha256=uApz59VUGkRx_HCy6kaiKJFTmaOAvAM3o2yYjEWi9EM,34594
34
- sglang/srt/server_args.py,sha256=NubTM6ocZY0YDSvPQbvBdURK-7E5kSKHA2Ze09O8_1I,32182
35
- sglang/srt/utils.py,sha256=p-eYNMVQkw6Yw1b7MBSNWhVvykCuNfejqMjl7cLDq7A,40708
33
+ sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
34
+ sglang/srt/server.py,sha256=tEciMH_U6WIZYPUGDDM0c4BQ16cvgVdA4II-ksPZoMo,34621
35
+ sglang/srt/server_args.py,sha256=LgnQ-kBJZ3E7hMMZj9bSK0mn7Bhjk1nJHxLcxl-lGTM,34572
36
+ sglang/srt/utils.py,sha256=WWEcMJHmvlOjiqE9UicT0ZYwa2PUKDZorAk2Y8PPRBI,42039
36
37
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
37
38
  sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
38
39
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
@@ -41,9 +42,9 @@ sglang/srt/configs/model_config.py,sha256=OjEeigs5tMNKP-RImJk2NHVFXv-fyQfsGREWMO
41
42
  sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
42
43
  sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
43
44
  sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
44
- sglang/srt/constrained/outlines_backend.py,sha256=LmezsMAmTlQgaPqO4Axl4EcSAqKr_ZYZASfAoVxT17A,6670
45
+ sglang/srt/constrained/outlines_backend.py,sha256=CipNHNNXs8xtnJNVNe6FCwZUlSbIXbGmWVlZz3hUpFQ,6820
45
46
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
46
- sglang/srt/constrained/xgrammar_backend.py,sha256=4ZCQgcjWEY2Lg4r2V9sAiYJJblkQ_uVbEnvsjqhR1Pc,4548
47
+ sglang/srt/constrained/xgrammar_backend.py,sha256=4It9_GqU4UZFhxIw_7hkzpXaMPUtksk6Xfe0Agsfw7A,4620
47
48
  sglang/srt/distributed/__init__.py,sha256=__tl9Frrf3PFrSyNYcn5i-y2rL-J4-Qn6RJwrsZ4xgc,83
48
49
  sglang/srt/distributed/communication_op.py,sha256=ZoIhboZyefiAwr-1K-wF3rAFSQ4Wt-RxXpsX443Gbt4,1157
49
50
  sglang/srt/distributed/parallel_state.py,sha256=HplRH5S0AWdwSdhoHYX9_UWQZlFjh2Z1LHaz68EXlpE,47555
@@ -62,99 +63,105 @@ sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDv
62
63
  sglang/srt/layers/fused_moe_patch.py,sha256=DMIyrwOON7OSidKZdreL5HzMhP0AD5Ues0xdY-ADOQw,4471
63
64
  sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
64
65
  sglang/srt/layers/linear.py,sha256=dF2HvqiMbhWlCjvkLFRCcgUFGhG-B0keM_CIpjvgTtg,46154
65
- sglang/srt/layers/logits_processor.py,sha256=oZNu9pNNgmswhuw8irlLm0SfpVrD7cFf-GdfPsLZGHE,13227
66
+ sglang/srt/layers/logits_processor.py,sha256=JlOU0x8vBGIuTwHSdjR6Kly9_uzilBMv0NE_rvUx0W4,14747
66
67
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
67
- sglang/srt/layers/radix_attention.py,sha256=C_mK4mfmKlxMRNeKYP9E5R3PRd3eT-OcE_g3mo36dJM,2058
68
+ sglang/srt/layers/radix_attention.py,sha256=E4cmvkcCdCtb6VyLNrCKy1D6VwHQ063oH3JQXPaRy6w,2178
68
69
  sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
69
- sglang/srt/layers/sampler.py,sha256=_enfER8MSxsCYrR6_NgyFxKA_XqKtii_asOZUFUUsd8,4580
70
- sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
70
+ sglang/srt/layers/sampler.py,sha256=k4Op_HMkQfT7t9wgQwBVotfTUXEocrzRyQqEFnff1pc,5511
71
+ sglang/srt/layers/torchao_utils.py,sha256=07Fe2Csdh1JiQKPGGHWkbq0-a6bV7Cq136ygdtVAhgI,3708
71
72
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=slGwLiWjuFLCUdRe-GTlfumyZpqVX9VF6No_UGOT-hA,21624
72
- sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
73
- sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
74
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=q0yPeKUjGbxKnOqbZLHs5fyYkkVE3YEkBBMWtUaiDL4,24611
75
- sglang/srt/layers/attention/torch_native_backend.py,sha256=amR8xGaaCONvo3M8I4nuYMv6OxPHyYoxu3m8vAqjWfo,10295
76
- sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
77
- sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=BE63WhKiutSNkhJLsRwvfsRy-ExvuAv7FZyoWv73ul8,18744
73
+ sglang/srt/layers/attention/__init__.py,sha256=KIJhzOJWYioQE7Va4D83-V-ZUZVMZcczuNgDC3dlSRo,2583
74
+ sglang/srt/layers/attention/double_sparsity_backend.py,sha256=RQdEKRykSLf9ilnaHmR6T7RFqh4emH_adfB3aJN2BUU,10920
75
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=NgeigL1WiPOuOry0Gbxv-6HEcERB8Du0mBJgYcTVIAA,24943
76
+ sglang/srt/layers/attention/torch_native_backend.py,sha256=nQdeqWEMMH_wrod5wssDCJG-uPKm0uslvkALKqPRPQ8,10509
77
+ sglang/srt/layers/attention/triton_backend.py,sha256=-TobyZHwlbJ5HhbFg-jgCqVOw4Y-opgEuFo-EusASQc,6264
78
+ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=oJ_UK1t229zF3hbTDiQe7t-X-IbM2dOxx4U2ch-vmjA,17847
78
79
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
79
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=Gfct-0_l-S2ZrP4F-zkzNiFbmd3C3f7uJovacOuDxaA,11472
80
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=tZJhzqcf1KKMT8z7_32eVk_D1NHP71c-S3UNxemfAHM,11542
80
81
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
82
+ sglang/srt/layers/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
+ sglang/srt/layers/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
84
+ sglang/srt/layers/ep_moe/layer.py,sha256=uMropMhU-MaycoxSLxcfD0jZC_cuL_boRbIu86mbZjY,23034
81
85
  sglang/srt/layers/fused_moe_triton/__init__.py,sha256=PHKFqd2hPOO-g9kSMseg2g76lpg9OGXQDThWU6bt9vs,902
82
- sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=qwfRBOeY5DT48Q6z71Eh9cjFehvs_K6eLIVWNL044Ug,28363
83
- sglang/srt/layers/fused_moe_triton/layer.py,sha256=URDkTt8xEqnqpO5tb_3L7JlhlO53VWfqDDNSRYEu-LY,21545
84
- sglang/srt/layers/quantization/__init__.py,sha256=0LnBJ2LOtk7HnMzOZm14tMd4Y0eSPSgqUhj9uOn53Es,5961
86
+ sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=fLGmkY6imJYjEw9-3-jJthkMcFGMBcu9HCNIuxAzMhE,29625
87
+ sglang/srt/layers/fused_moe_triton/layer.py,sha256=eMpbZlP3FAQxbHochis7ybZ-fsNBP0PzKF1PN0Xo7so,21517
88
+ sglang/srt/layers/quantization/__init__.py,sha256=FgNy_zNWMWnq3lEGyCSyfLSQtcZtWlq99JilkmEDW7I,4594
85
89
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
90
+ sglang/srt/layers/quantization/fp8.py,sha256=3oIUPaD0PBXQyTKr44I0YJ8XXDdwyoS_-ZA97XdSxXE,24143
91
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=eJDLLDu8ZbrbE3BfFIf89JlIMPOP-14DesbeVsajW0Q,1035
86
92
  sglang/srt/lora/lora.py,sha256=-o2mBmUvoVpdkgdAkWTARN4kfyep3UNEJLcg6moh0SU,15056
87
93
  sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
88
94
  sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
89
95
  sglang/srt/managers/data_parallel_controller.py,sha256=psI4FAuBGjtdnEuwagnGdtRqvqSSxOROfNKQqVDqlVA,8382
90
- sglang/srt/managers/detokenizer_manager.py,sha256=TtrtE37XT5XcJzk8-R5rHZ16NHTPd5XZi8hf3h-sB2A,7462
96
+ sglang/srt/managers/detokenizer_manager.py,sha256=nZkbwt4yty_oy8rvg4T7PbgyVLoBLohvHl25xlQpBoo,8439
91
97
  sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
92
- sglang/srt/managers/io_struct.py,sha256=d_kctmHcNBzzaP5lUEIpdrVVsob4dNOetMHkobUJZz4,14439
93
- sglang/srt/managers/schedule_batch.py,sha256=p6OmeCH3arGZ99Ch7JrnTgGU-SsPgjQY7Z6arVzeqao,44998
94
- sglang/srt/managers/schedule_policy.py,sha256=7QuIsJDRzkrvs3IJk10oOfL4Me0UZwDYvRniT1fSFuo,12620
95
- sglang/srt/managers/scheduler.py,sha256=fmvwVcZS5DsDdxIJ4bjlLV-qJsRbTxjsqPP5a2SS0C8,59372
98
+ sglang/srt/managers/io_struct.py,sha256=_LWWqT3LNwZGaWhg2d3kTg1V2MTHKzRasCvxF9Nfpi4,15429
99
+ sglang/srt/managers/schedule_batch.py,sha256=SAd7sxhoC3Bp8_xd-TEcXEFZBlGZPbn8-wMvBcjU55Q,45607
100
+ sglang/srt/managers/schedule_policy.py,sha256=cLNi__smbg02keWgUMfB_nEM3vllocPB0XyG1P5qO7I,15469
101
+ sglang/srt/managers/scheduler.py,sha256=QlcVMtrLlNcBOkVISdO556jrK8a4LE4ULskC0oCH2IQ,61776
96
102
  sglang/srt/managers/session_controller.py,sha256=Yp-IV3rXczACZxZXmF-QxW9CWICGy8KHQ9ttBGJ8WXA,2800
97
- sglang/srt/managers/tokenizer_manager.py,sha256=XPaSXB6b23u95viFqlqd-tdyrNMMOOiSDWviz_g7UBM,29890
103
+ sglang/srt/managers/tokenizer_manager.py,sha256=gnCCdB5XDobOoBKptwv-o0yYqDkMUxL78s0zBno5lM4,31219
98
104
  sglang/srt/managers/tp_worker.py,sha256=X1EwFX3FSsmXx7jeeX2tjZRocaujabQYWm-M-0CFEBE,7363
99
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=ZBLCAYz-Ls1coObyO8dNtNsO6gG2rn40KulEmALB_J8,8686
100
- sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
101
- sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
105
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=-QNBJRKxraa9Xt2WI1AFzZYdneIJ1eXv0GjFzDqXoE0,8926
106
+ sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
107
+ sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
102
108
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
103
- sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47V0WqNU,10934
104
- sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
109
+ sglang/srt/mem_cache/memory_pool.py,sha256=l9_srwXEfIIDF46nxykbHIOo1VSvU5_Ew3H0r5EC7Fo,11072
110
+ sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
105
111
  sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
106
112
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
107
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=ffypnhzwAdiBAngHEpK9HVmpvX0H3dFIKGa45tS0IGI,15197
113
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=kZ3nV03MD8EQYQB38u4_88_wyW4unECxAdMVICpPyuk,16241
108
114
  sglang/srt/model_executor/forward_batch_info.py,sha256=L5mVoW5SaO6To-7nGk0TZM-FFB5_78cARpJ-aC2rwD0,12883
109
- sglang/srt/model_executor/model_runner.py,sha256=wU06s5tpHgtxitmAWGQC1NjLIcp68iAdbFDWN-TtkBU,29260
115
+ sglang/srt/model_executor/model_runner.py,sha256=MLYBcYIQihu2I3PBTUghiU2mSWsDMzlKzcnX7yHa9JU,29837
110
116
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
111
117
  sglang/srt/model_loader/loader.py,sha256=VBrY4W9CiVvS_D8yXhdkW9jReV9rSMSkJplabz0Fxgk,43528
112
118
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
113
119
  sglang/srt/model_loader/weight_utils.py,sha256=kQo9KPThjH3HAOCfC_tdwdrshdWuWJOVpPR0skSyaRY,24193
114
120
  sglang/srt/models/baichuan.py,sha256=PzBOFcEAixakPEkQSaJwC0Xc1fu-yCsN9T0I67r8QmY,14919
115
121
  sglang/srt/models/chatglm.py,sha256=DOrEhmb0s-yPId88R6nJeLOTUEtogk-vkB69qT2JdWc,12913
116
- sglang/srt/models/commandr.py,sha256=-cswAK2_ZTcOsvBkv5Z8SZ1x60cxJryslbSPHDOcnbM,14169
122
+ sglang/srt/models/commandr.py,sha256=PNXgfOZF84h-rSH0edEECUmEGW8YLb44V75Z_oDhFiA,14223
117
123
  sglang/srt/models/dbrx.py,sha256=2Wqcf3sv57l4gi2xH8yrb5WSmY-4_kbbf6fhpJ4aKWw,14581
118
124
  sglang/srt/models/deepseek.py,sha256=BVNICGoLjQoHmR5lc31YrZ6YbxSRTBilHqlLsALr2u8,15693
119
- sglang/srt/models/deepseek_v2.py,sha256=2AWdDb5qZ6dj_M6ZY-rCG0omTmerUxbhwuvI9Um4Bg4,31967
125
+ sglang/srt/models/deepseek_v2.py,sha256=YKSrqagVcSUwCAi-rwIph-Xu12GrNETMNKxgnffWod8,35349
120
126
  sglang/srt/models/exaone.py,sha256=dkERTZVxrRroqu5AGLP7D4N6n8HvDqlNaDQUIe15mZY,13038
121
127
  sglang/srt/models/gemma.py,sha256=ydRqsG-7004r1fAiz01LHUmcj_6XN0Tn4xO1keJnMQk,12126
122
- sglang/srt/models/gemma2.py,sha256=vPrAasJajitQHB9ZqMFut58xNsOm3fk2m05a-feQL10,14600
123
- sglang/srt/models/gemma2_reward.py,sha256=hJw0hXNPyQSpazkVJVYiW04OtTZH0GiLI-JJef_kaGs,2529
128
+ sglang/srt/models/gemma2.py,sha256=41PlW8pMb4rMETdAni_JWDhZeIn_QsTQireAyUjsURA,15848
129
+ sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb-_Hq8,2494
124
130
  sglang/srt/models/gpt2.py,sha256=2je1kE09sGcaORWnJuGYAkcwwOrT9EK-KhQaoCKjCSA,9517
125
131
  sglang/srt/models/gpt_bigcode.py,sha256=tovyOdJu2x3LkzmkdFXX_iJdkxuyChIDxwgvPBy6UPo,9528
126
- sglang/srt/models/grok.py,sha256=lJQEk2D8zyAXE0g4vXLCEmiWM938K_qly02EScwwV_k,13942
132
+ sglang/srt/models/granite.py,sha256=AeQY9Dxd1ZnwgCYBK0vSXXiMGM-yt9iaOVf_ruOUHXw,20409
133
+ sglang/srt/models/grok.py,sha256=UWvVEYfEoH0jGNFSbXpO66OGW5pzmIHlNKcn9gRZEoQ,15664
127
134
  sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
128
135
  sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
129
- sglang/srt/models/llama.py,sha256=JVwVDU-8L8RSNOYH0CK8FI557xqjPU2Ts_pQhVfGZv4,19550
130
- sglang/srt/models/llama_classification.py,sha256=EdXmiMyfJ9NH5P-Wel7SRhf_v8ddFFhVJMQgzDt0oVk,3377
136
+ sglang/srt/models/llama.py,sha256=S7nS05hhFGghXu0v-w9RZyBTY6OCEVF5Aaw4GX_E_9g,19929
137
+ sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJs994cIpAPa2g,2984
131
138
  sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
132
- sglang/srt/models/llama_reward.py,sha256=JVaiTK4gVXNMimeq3kKkv7dt5Hc77hPqF4ewvmzjJes,4622
133
- sglang/srt/models/llava.py,sha256=l9mqS9wl_l6ARC-K1UUe7XsB5k9sZratMNQEwx5IjR0,25229
139
+ sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
140
+ sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
134
141
  sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
135
142
  sglang/srt/models/minicpm.py,sha256=ws4AqhOfAvYHGd04QuXCZel-Oxy9_vN4p4rTjs9RSz0,13723
136
143
  sglang/srt/models/minicpm3.py,sha256=YIKJDTpwjmpLlv1sNT93k2yZMvGQlI_H87czjf6QYyo,24707
137
144
  sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
138
- sglang/srt/models/mixtral.py,sha256=KvvtLWn-GLtwbdrt4PBq9gIFJiJH-JFcj0BLfcLcXjo,14322
145
+ sglang/srt/models/mixtral.py,sha256=vi6ssY75kNLy_kJrDru6gJYiAogHjSniaO6aMFd1w4E,14515
139
146
  sglang/srt/models/mixtral_quant.py,sha256=uuVO1nWUZJiDhbqZN6gzSMwyfpyZorMuFXHeMCGo7N0,14022
140
147
  sglang/srt/models/mllama.py,sha256=3kX-UqeTSYZL5kPNdkfKEAEv3DpSAW1ArAAoeiXVzIc,37739
141
148
  sglang/srt/models/olmo.py,sha256=OCDMtX1OI83r80mzU4FMC3Tg8cleQ-7C8Tpoe8zgzss,11708
142
149
  sglang/srt/models/olmo2.py,sha256=aC7svioN7XT5owRxPrvhvWBNMON9QXGQBWJ1KHMyXeA,13442
143
150
  sglang/srt/models/olmoe.py,sha256=Rw-3YrHWd90MZQFnmcfUQ-3wAaI0PCFKb0DIrCDND3s,15347
144
- sglang/srt/models/phi3_small.py,sha256=rQFv-4aUGkNPTQjTEME5bcOL9DEQqqFb3aDKrj2joOA,15083
151
+ sglang/srt/models/phi3_small.py,sha256=44_my3QmgJ2N7SOkGZzEb62DXBeCVHojfmCWgkk2uCI,14802
145
152
  sglang/srt/models/qwen.py,sha256=_FKDbwaS5C07uJyyivZpBrXJVej4Ph9ivzJdzWJPxJ4,9904
146
- sglang/srt/models/qwen2.py,sha256=Kh6mW0H2jQdrPS9dJnJShLpo0BNEq6oI4oy5VMHGzac,12444
147
- sglang/srt/models/qwen2_moe.py,sha256=5-ZTYW5bERhTG5LnNXMqYfWB17NBkvxmQJu6DuDAFCo,16819
153
+ sglang/srt/models/qwen2.py,sha256=be4xgcuqNa9kBdaL7x3PjsnUky6fh5K33c_khAWSi04,12959
154
+ sglang/srt/models/qwen2_moe.py,sha256=rYUk_vZW3ftKIIlqPvJZ1K-6oZ_PfGspixh1zm2Y8C8,16538
148
155
  sglang/srt/models/qwen2_vl.py,sha256=3EaUlTbyWOTRXA7eViK1WqmVbCFhXLIpnos49zzf-yM,26561
149
156
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
150
157
  sglang/srt/models/stablelm.py,sha256=iBlIkM7CQmqI25nsujWk0LLCQD7TshzUU8qzZYYrt20,11311
151
- sglang/srt/models/torch_native_llama.py,sha256=dHuUln7RcsRcdyOIUw7HKtOOG9OPmnfOpK8j5gYXzJY,19039
158
+ sglang/srt/models/torch_native_llama.py,sha256=YeXHorFm6QfnczLXwPb5TG9a-He0uiA9RzpR1YZKGg4,18758
152
159
  sglang/srt/models/xverse.py,sha256=Oq--KqvbYu2H4TMVGEHpSnJLEwXBpxlncR9ilsQeckc,13579
153
160
  sglang/srt/models/xverse_moe.py,sha256=AawKEQw--oAl-yzwCjoaZRG7q3rdkyDiam3FS0zjf_c,15537
154
161
  sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
155
- sglang/srt/openai_api/adapter.py,sha256=gZEaG1dVSFv9WLj0369Ke1yrNNgi_gpgKxPt5Ju9mUw,53775
156
- sglang/srt/openai_api/protocol.py,sha256=4T9hGCrpfCUSBjKZFvemTfj49CkTUzpCcx6izLv3ir0,10246
157
- sglang/srt/sampling/sampling_batch_info.py,sha256=YC-KPyDWyLGNPL4YVcst4xwP8Wlz2zcCNJHB_5zljXQ,8470
162
+ sglang/srt/openai_api/adapter.py,sha256=dvKq4O3Rhd77ad6iCtPNykgnk9PVJE-E8wHVsBAfCQQ,53927
163
+ sglang/srt/openai_api/protocol.py,sha256=ecRNNqkhwwKZaIoJlPhtp2VTcHxBJDbNN8lrKS7uBx8,10406
164
+ sglang/srt/sampling/sampling_batch_info.py,sha256=s--zNjk-LErZ5lMqnZ7KiuJltaziKRbQAU5qYpKIxAc,8564
158
165
  sglang/srt/sampling/sampling_params.py,sha256=n7RbBg_bS5fYhsiWa8uJYnfoXy_i5DvtTBOkuFnHDNU,5286
159
166
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
160
167
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
@@ -175,10 +182,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
175
182
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
176
183
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
177
184
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
178
- sglang/test/test_utils.py,sha256=0lY3ZNfS3JCB4LqSRJgBfB8I0MA8TUT-BJmnrvQC8vw,23797
185
+ sglang/test/test_utils.py,sha256=HJG7kUQOk6n9FBbH89PDtQ41C3kt1cfJODhAEcFT0AQ,23823
179
186
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
180
- sglang-0.4.0.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
181
- sglang-0.4.0.dist-info/METADATA,sha256=dXh43Qx9ImYKIF-eB8vuiz_0DDbmNgUrOjCukOXCBTA,22165
182
- sglang-0.4.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
183
- sglang-0.4.0.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
184
- sglang-0.4.0.dist-info/RECORD,,
187
+ sglang-0.4.0.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
188
+ sglang-0.4.0.post2.dist-info/METADATA,sha256=maHXecD3U1DdhzfU2aBMhN96MQRqCBPsIA1KlO7t7dg,22512
189
+ sglang-0.4.0.post2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
190
+ sglang-0.4.0.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
191
+ sglang-0.4.0.post2.dist-info/RECORD,,