sglang 0.2.11__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sglang/bench_latency.py +6 -4
  2. sglang/bench_serving.py +46 -22
  3. sglang/lang/compiler.py +2 -2
  4. sglang/lang/ir.py +3 -3
  5. sglang/srt/constrained/base_tool_cache.py +1 -1
  6. sglang/srt/constrained/fsm_cache.py +12 -2
  7. sglang/srt/layers/activation.py +33 -0
  8. sglang/srt/layers/{token_attention.py → decode_attention.py} +9 -5
  9. sglang/srt/layers/extend_attention.py +6 -1
  10. sglang/srt/layers/layernorm.py +65 -0
  11. sglang/srt/layers/logits_processor.py +5 -0
  12. sglang/srt/layers/pooler.py +50 -0
  13. sglang/srt/layers/{context_flashattention_nopad.py → prefill_attention.py} +5 -0
  14. sglang/srt/layers/radix_attention.py +2 -2
  15. sglang/srt/managers/detokenizer_manager.py +31 -9
  16. sglang/srt/managers/io_struct.py +63 -0
  17. sglang/srt/managers/policy_scheduler.py +173 -25
  18. sglang/srt/managers/schedule_batch.py +110 -87
  19. sglang/srt/managers/tokenizer_manager.py +193 -111
  20. sglang/srt/managers/tp_worker.py +289 -352
  21. sglang/srt/mem_cache/{base_cache.py → base_prefix_cache.py} +9 -4
  22. sglang/srt/mem_cache/chunk_cache.py +43 -20
  23. sglang/srt/mem_cache/memory_pool.py +2 -2
  24. sglang/srt/mem_cache/radix_cache.py +74 -40
  25. sglang/srt/model_executor/cuda_graph_runner.py +24 -9
  26. sglang/srt/model_executor/forward_batch_info.py +168 -105
  27. sglang/srt/model_executor/model_runner.py +24 -37
  28. sglang/srt/models/gemma2.py +0 -1
  29. sglang/srt/models/internlm2.py +2 -7
  30. sglang/srt/models/llama2.py +4 -4
  31. sglang/srt/models/llama_embedding.py +88 -0
  32. sglang/srt/models/qwen2_moe.py +0 -11
  33. sglang/srt/openai_api/adapter.py +155 -27
  34. sglang/srt/openai_api/protocol.py +37 -1
  35. sglang/srt/sampling/penaltylib/__init__.py +13 -0
  36. sglang/srt/sampling/penaltylib/orchestrator.py +357 -0
  37. sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +80 -0
  38. sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +105 -0
  39. sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +79 -0
  40. sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +83 -0
  41. sglang/srt/sampling_params.py +31 -4
  42. sglang/srt/server.py +69 -15
  43. sglang/srt/server_args.py +26 -19
  44. sglang/srt/utils.py +31 -13
  45. sglang/test/run_eval.py +10 -1
  46. sglang/test/runners.py +63 -63
  47. sglang/test/simple_eval_humaneval.py +2 -8
  48. sglang/test/simple_eval_mgsm.py +203 -0
  49. sglang/test/srt/sampling/penaltylib/utils.py +337 -0
  50. sglang/test/test_layernorm.py +60 -0
  51. sglang/test/test_programs.py +4 -2
  52. sglang/test/test_utils.py +20 -2
  53. sglang/utils.py +0 -1
  54. sglang/version.py +1 -1
  55. {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/METADATA +23 -14
  56. sglang-0.2.12.dist-info/RECORD +112 -0
  57. sglang/srt/layers/linear.py +0 -884
  58. sglang/srt/layers/quantization/__init__.py +0 -64
  59. sglang/srt/layers/quantization/fp8.py +0 -677
  60. sglang-0.2.11.dist-info/RECORD +0 -102
  61. {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/LICENSE +0 -0
  62. {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/WHEEL +0 -0
  63. {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/top_level.txt +0 -0
@@ -1,102 +0,0 @@
1
- sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
2
- sglang/api.py,sha256=gAY9JhqWXjrYoWnMvR-iiuuY1YSN94We-lc1LH0z3cw,6030
3
- sglang/bench_latency.py,sha256=CXvukEW0IeoH2IwN2vuriC0eHBdJsz3lgT7OwwNo_7A,16146
4
- sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
5
- sglang/check_env.py,sha256=oU8VmjjPK2SviRhr41cF1953soBu-eTT5E0Hf04zMzo,4974
6
- sglang/global_config.py,sha256=9JxaFkBKSgep6BVeEl_kx9tuW9PqdijYELyBGTryl6o,1704
7
- sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
8
- sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
9
- sglang/utils.py,sha256=C50xm06WWKpKB8kSNs9vO4egJ2QTk_OAA6M13S2cB_A,8369
10
- sglang/version.py,sha256=_MLx4ac1juJPWEEiC9kMQISX3x3jFBr507jM2P_hxMg,23
11
- sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
13
- sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
14
- sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
15
- sglang/lang/interpreter.py,sha256=3RIeSGdKlKTq2Ixg_Tyo0fGEDTvBKS2f9FaJYODBHzA,30102
16
- sglang/lang/ir.py,sha256=FGWghAfVW9IcxcrVqHiqpf7vmWzuNYoVTMSbBZkYVRk,16839
17
- sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
18
- sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
20
- sglang/lang/backend/base_backend.py,sha256=Q5HdiDtyBewQeoYH0kDtBRVL8KFiEPNq9dw7XmauHQ8,1985
21
- sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
22
- sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
23
- sglang/lang/backend/runtime_endpoint.py,sha256=AaBc5yczchX7mkwiKDMyjLjBkJsh2Lubrfd9lvCOlDo,9544
24
- sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
25
- sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
26
- sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
27
- sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
28
- sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
29
- sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
30
- sglang/srt/server.py,sha256=hUNnTvH4c1AI2JJzoBUf9TQuTelx-vulcqwkEplw7Gk,16699
31
- sglang/srt/server_args.py,sha256=SmvnebtDTsvPNDyW6lltuJKC7h8eVdYmurY1ieIMySA,16475
32
- sglang/srt/utils.py,sha256=GcRFf3pb5l-Q5TJU4gF-Wp7Ct46l3BO0aMpjlyHXp3I,23766
33
- sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
34
- sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
35
- sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
36
- sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
37
- sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
38
- sglang/srt/layers/extend_attention.py,sha256=V5pm7toSDlzByaV4lGRgXVGWFUPf68chvvahlT2h4mk,14092
39
- sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
40
- sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
41
- sglang/srt/layers/logits_processor.py,sha256=wHKB1FjbfY0a7KGw5dCsEhmO4sc7VMy3gYtSPv4oQYM,11097
42
- sglang/srt/layers/radix_attention.py,sha256=lXwm-qs7hPy_EFV1Zf2pPQ0-drAdrO8V5J4eX0LwLtU,7505
43
- sglang/srt/layers/token_attention.py,sha256=pdBORaWQGvDy_Aitcq0XDHk2Rravol-jZZkrsgkXeng,8849
44
- sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
45
- sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
46
- sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
47
- sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
48
- sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
49
- sglang/srt/managers/io_struct.py,sha256=VK61d6zfnBz5a3IMmwYsa5PNa9jUXPPmED1TdDRQGDs,7345
50
- sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
51
- sglang/srt/managers/schedule_batch.py,sha256=sKQAHRL6VoapGiO7yQV796gW4sVGAgVVBMtmENbKtvg,29641
52
- sglang/srt/managers/tokenizer_manager.py,sha256=wqb6zQbkHYcSNU14Auuh5519CVMmfbKGBQvn_IwDSAo,21408
53
- sglang/srt/managers/tp_worker.py,sha256=3sHlN4hxksF22lkOJ8i3X6WSH4_5POy74BfbIAzIDtM,35216
54
- sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
55
- sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
56
- sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
57
- sglang/srt/mem_cache/memory_pool.py,sha256=oOKtPTgzujo9gHXykSuER7VKqQRuwNKlXyXlaK-3dxo,5280
58
- sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
59
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=EyI8sMMoVlOjdTT2Y3cfwo1-uQ43QCQ1skx5BNgchjE,9433
60
- sglang/srt/model_executor/forward_batch_info.py,sha256=P5bGeLsnFbEqgWLI5X5Eg0XFCG1j2oWZOsIAMZNkZW4,9022
61
- sglang/srt/model_executor/model_runner.py,sha256=yzkJLIM41mhbfgfq87ToskAaA1PS67YzhmoSMbflkZI,17479
62
- sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
63
- sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
64
- sglang/srt/models/chatglm.py,sha256=7bHU2AFoppINDZm0EdxgtAJe7rwr9OPkhOCfq2qNrIA,13862
65
- sglang/srt/models/commandr.py,sha256=5BEtIS2uUQJANkkY-6ZeDqlrpUK5yXVYHiztU3vsTKY,14172
66
- sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
67
- sglang/srt/models/deepseek.py,sha256=E5W4nkH-Ne449rAIwQZgz-FAH2Qqp2r1vNfboyk5wEg,16024
68
- sglang/srt/models/deepseek_v2.py,sha256=NMcckZb48kVUwAmDA2l8wO19T6DNkJOkKAhHa6utBZM,26968
69
- sglang/srt/models/gemma.py,sha256=ilfN_NOcz7hpwEJ2y7NW3fBFmFO7YfjhdFDbfzl2qww,12285
70
- sglang/srt/models/gemma2.py,sha256=D8GZOI1tAbEV9PaBmJSsJRzCmvaK3tGXttIbrMb5yiQ,16426
71
- sglang/srt/models/gpt_bigcode.py,sha256=OKk9UP67as3T5bePlTRGHTCD-1wqaUEk92AowXPm6dg,10204
72
- sglang/srt/models/grok.py,sha256=M9rtdXslqYBle5VyZqFVHiJUXq_q_aHbza63xa03zqI,27861
73
- sglang/srt/models/internlm2.py,sha256=CKWBL0dBvLdaEUeJOUvLUNPb8BLrAZ8_BSf2mfFQhfU,12225
74
- sglang/srt/models/llama2.py,sha256=3ZEWi0PVCDNjTrVNvLs1ESdyTcZhJlZjaH5uyS46JyM,14288
75
- sglang/srt/models/llama_classification.py,sha256=Dvzy3PfETiJtnKFOk8qDDLUoZECf_cpSrNeA60PaDo4,4932
76
- sglang/srt/models/llava.py,sha256=-ysi192vpBDxNaMS8qaLOhC34lXQyRtbG_0niVaceSo,18436
77
- sglang/srt/models/llavavid.py,sha256=MX7YpqYh5J4BoOnV7vVAIfoOlBFQXYpp8Kpe7WK0ejk,13562
78
- sglang/srt/models/minicpm.py,sha256=ea_OyiwVTo6Tg9jNRAwqxETnA6FFeAqlIbiUS-xViEI,13843
79
- sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
80
- sglang/srt/models/mixtral.py,sha256=raSLbp6AfWg5_u-f-lYeRejE9koAjbHt8iIHXd3nURM,21397
81
- sglang/srt/models/mixtral_quant.py,sha256=xYeeatZ9OfwCTas_KbH9nl6lnUT4YqSY7NAxpgLp5LE,14222
82
- sglang/srt/models/qwen.py,sha256=43ea6gn4wHzAaI3JTDLtl08aEm0vIqgzbVH9M8oeuY0,10006
83
- sglang/srt/models/qwen2.py,sha256=Hyhks2r4KHpKeb9iHZpnvEVc5klmnrPwcLohqg8j1kw,12284
84
- sglang/srt/models/qwen2_moe.py,sha256=PZdhEf0DUuGWsld3TyDWlIqSbrrOdqvCD4lAtCPWXeg,18147
85
- sglang/srt/models/stablelm.py,sha256=yPrdzPEoUD2s_Q3RgOq7BBC7z-UtEaACzabqbDRs2tA,11368
86
- sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
87
- sglang/srt/openai_api/adapter.py,sha256=Eq44_hGwHcglCKOc6WqWDxBsgyRqtuC6VR4HB4GLfUY,38193
88
- sglang/srt/openai_api/protocol.py,sha256=pcRgmDM3Kozh74Aj-qEo8q64BI6hEjrdhYDU4m9srdI,8294
89
- sglang/test/run_eval.py,sha256=kbM6SiosfXj-1uYTFXPWMd7hZDvJZwV-AmdHi_WfP3A,3559
90
- sglang/test/runners.py,sha256=APXXbrqmUGUqnX7T1Aq8X2NJQkIqtv6B42a2ybdlPjA,7459
91
- sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
92
- sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
93
- sglang/test/simple_eval_humaneval.py,sha256=k50DKoAbXiw-ubrFXHet9B-7tboHU2dQJf5G3C-KKq4,5838
94
- sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
95
- sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
96
- sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
97
- sglang/test/test_utils.py,sha256=ITQcY3WGV4kLGWEkfU-AeuFX8yGLmq9LEK5jHiuW7Sw,13991
98
- sglang-0.2.11.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
99
- sglang-0.2.11.dist-info/METADATA,sha256=gSQA5-Hf9y41ulOKiMeHRu4Nf-c9Nbt6xhmlCGzvhNY,33783
100
- sglang-0.2.11.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
101
- sglang-0.2.11.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
102
- sglang-0.2.11.dist-info/RECORD,,