sglang 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +33 -26
- sglang/api.py +9 -1
- sglang/bench_latency.py +2 -2
- sglang/bench_serving.py +10 -1
- sglang/check_env.py +1 -1
- sglang/lang/backend/litellm.py +1 -1
- sglang/lang/backend/openai.py +1 -1
- sglang/lang/interpreter.py +20 -5
- sglang/lang/ir.py +1 -1
- sglang/srt/constrained/__init__.py +15 -0
- sglang/srt/constrained/base_cache.py +15 -0
- sglang/srt/constrained/fsm_cache.py +15 -0
- sglang/srt/constrained/jump_forward.py +15 -0
- sglang/srt/conversation.py +26 -0
- sglang/srt/hf_transformers_utils.py +15 -0
- sglang/srt/layers/context_flashattention_nopad.py +15 -0
- sglang/srt/layers/extend_attention.py +15 -0
- sglang/srt/layers/fused_moe.py +15 -0
- sglang/srt/layers/linear.py +15 -0
- sglang/srt/layers/logits_processor.py +41 -13
- sglang/srt/layers/quantization/__init__.py +15 -0
- sglang/srt/layers/quantization/fp8.py +15 -0
- sglang/srt/layers/radix_attention.py +17 -2
- sglang/srt/layers/token_attention.py +16 -1
- sglang/srt/managers/{controller/manager_multi.py → controller_multi.py} +17 -2
- sglang/srt/managers/{controller/manager_single.py → controller_single.py} +17 -2
- sglang/srt/managers/detokenizer_manager.py +16 -1
- sglang/srt/managers/io_struct.py +36 -3
- sglang/srt/managers/{controller/schedule_heuristic.py → policy_scheduler.py} +37 -22
- sglang/srt/managers/{controller/infer_batch.py → schedule_batch.py} +31 -12
- sglang/srt/managers/tokenizer_manager.py +39 -16
- sglang/srt/managers/{controller/tp_worker.py → tp_worker.py} +130 -40
- sglang/srt/mem_cache/flush_cache.py +33 -0
- sglang/srt/{memory_pool.py → mem_cache/memory_pool.py} +16 -1
- sglang/srt/{managers/controller → mem_cache}/radix_cache.py +15 -0
- sglang/srt/mm_utils.py +15 -0
- sglang/srt/model_config.py +15 -0
- sglang/srt/{managers/controller → model_executor}/cuda_graph_runner.py +16 -1
- sglang/srt/{managers/controller → model_executor}/model_runner.py +32 -12
- sglang/srt/model_loader/model_loader.py +15 -0
- sglang/srt/model_loader/utils.py +16 -1
- sglang/srt/models/chatglm.py +16 -1
- sglang/srt/models/commandr.py +16 -1
- sglang/srt/models/dbrx.py +16 -1
- sglang/srt/models/deepseek.py +16 -1
- sglang/srt/models/deepseek_v2.py +16 -1
- sglang/srt/models/gemma.py +16 -1
- sglang/srt/models/gemma2.py +16 -1
- sglang/srt/models/gpt_bigcode.py +16 -1
- sglang/srt/models/grok.py +16 -1
- sglang/srt/models/internlm2.py +16 -1
- sglang/srt/models/llama2.py +16 -1
- sglang/srt/models/llama_classification.py +16 -1
- sglang/srt/models/llava.py +17 -2
- sglang/srt/models/llavavid.py +17 -2
- sglang/srt/models/minicpm.py +16 -1
- sglang/srt/models/mistral.py +15 -0
- sglang/srt/models/mixtral.py +16 -1
- sglang/srt/models/mixtral_quant.py +16 -1
- sglang/srt/models/qwen.py +16 -1
- sglang/srt/models/qwen2.py +16 -1
- sglang/srt/models/qwen2_moe.py +16 -1
- sglang/srt/models/stablelm.py +16 -1
- sglang/srt/models/yivl.py +15 -0
- sglang/srt/openai_api/adapter.py +520 -135
- sglang/srt/openai_api/protocol.py +64 -0
- sglang/srt/sampling_params.py +15 -0
- sglang/srt/server.py +89 -23
- sglang/srt/server_args.py +49 -11
- sglang/srt/utils.py +15 -0
- sglang/utils.py +22 -0
- sglang/version.py +1 -1
- {sglang-0.2.6.dist-info → sglang-0.2.7.dist-info}/METADATA +32 -6
- sglang-0.2.7.dist-info/RECORD +93 -0
- {sglang-0.2.6.dist-info → sglang-0.2.7.dist-info}/WHEEL +1 -1
- sglang/srt/flush_cache.py +0 -18
- sglang-0.2.6.dist-info/RECORD +0 -93
- {sglang-0.2.6.dist-info → sglang-0.2.7.dist-info}/LICENSE +0 -0
- {sglang-0.2.6.dist-info → sglang-0.2.7.dist-info}/top_level.txt +0 -0
sglang-0.2.6.dist-info/RECORD
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
sglang/__init__.py,sha256=UV7VlXhXrwi00Zg45iNB9KcnmrwLjdMtjMz06AiafY0,1151
|
2
|
-
sglang/api.py,sha256=1JARbc1wNYF6tODdUpgmNgTyLOvMnxdTBctLvEwzGTY,5565
|
3
|
-
sglang/bench_latency.py,sha256=UPy6WhrddMTDX7HqIeHNhCn5vF0YMOKxJlQRvhMC8zU,10552
|
4
|
-
sglang/bench_serving.py,sha256=UWhTENnoATPJo3nk59Ktr73CwZgiY_MGaRY6TQk0ozI,34584
|
5
|
-
sglang/check_env.py,sha256=CscuPMlf68dkgZf0m-FiLpUisNNDoihMck4qhLOeV1Q,4124
|
6
|
-
sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
|
7
|
-
sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
|
8
|
-
sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
|
9
|
-
sglang/utils.py,sha256=arJuwOAEX445M2NL9SAOi6jBNu0-cfU04PLAr-hIH3U,8168
|
10
|
-
sglang/version.py,sha256=Oz5HbwHMyE87nmwV80AZzpkJPf-wBg7eDuJr_BXZkhU,22
|
11
|
-
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
|
13
|
-
sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
|
14
|
-
sglang/lang/interpreter.py,sha256=fbPrKF_SDpVPsiV2WbmlMfwRA7C9T9_IyVmGnpaXa0A,29687
|
15
|
-
sglang/lang/ir.py,sha256=5VVK2JnbspdysrhcGgkmp_JlAprd2XqqRnS_GfP_XWc,16645
|
16
|
-
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
17
|
-
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
19
|
-
sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
20
|
-
sglang/lang/backend/litellm.py,sha256=QsaLRh0KVyuaxRZGAvLOdCCSStIMs-V0XyMX0PR6y0w,2452
|
21
|
-
sglang/lang/backend/openai.py,sha256=-ScfI2TFALB_FTYBur9ab0gNYxK1ogHkhdLxX19t6-Y,14808
|
22
|
-
sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
|
23
|
-
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
24
|
-
sglang/srt/conversation.py,sha256=Il7JJuu4o42k2xdBWVfONNmstTsAM-4idX6AcEOnrXQ,15526
|
25
|
-
sglang/srt/flush_cache.py,sha256=SJsbZnmDhH-gb9ch3hIwnI_nuwaOLlKvlXADyLBGENk,403
|
26
|
-
sglang/srt/hf_transformers_utils.py,sha256=RnyxC1_OmOf-QzdPBziqAUOIQXyRzrb4RNlqFB1ArEc,11354
|
27
|
-
sglang/srt/memory_pool.py,sha256=FhJk5GtYortO3MJIsMMQ-o49agwDHVX1aEQH2LITq6c,3949
|
28
|
-
sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
|
29
|
-
sglang/srt/model_config.py,sha256=9VF7ET0CGKEY-zdiU7kGv8Cg7H_9Q1fmqtI3C0z22S0,5458
|
30
|
-
sglang/srt/sampling_params.py,sha256=WjJ_sOhbJVMKIBH8gJWQKhzeK5Ipu9XRNV7soWnLtak,3122
|
31
|
-
sglang/srt/server.py,sha256=IKSTgp6FJN6TE9anog47zh9GJYXoyMjEKBNXUZ89Cuk,14197
|
32
|
-
sglang/srt/server_args.py,sha256=RfWoipSUURmv5NqT4L_YF9qJ6gOkZ8omRUFC_5fmgts,14043
|
33
|
-
sglang/srt/utils.py,sha256=HvKkGbut8sOxMpGIzYsJ9NEZJg48LOnxyGESaGZmANs,22385
|
34
|
-
sglang/srt/constrained/__init__.py,sha256=5LB3_mDTMW6wcRkFA5J2Rd5HPHHEKRyiELhe4gtlBYM,1472
|
35
|
-
sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
|
36
|
-
sglang/srt/constrained/fsm_cache.py,sha256=HlzFs9TXvMFmeZhTpXmJU3UNQ_Kix4Ir-SwpqXGhX8k,2061
|
37
|
-
sglang/srt/constrained/jump_forward.py,sha256=s60jZ7Ue8zaodgQm7gDpN6pSedpvpUck_waJALUMj60,5615
|
38
|
-
sglang/srt/layers/context_flashattention_nopad.py,sha256=7ps_9W_ia9zikL9HqsSUwWHyBVotywosE-dOiPtaGY8,4615
|
39
|
-
sglang/srt/layers/extend_attention.py,sha256=aYAAL9HZJpaSASp-ulMvbmSmyMcqdYUsgVQC-Lbm7_U,12008
|
40
|
-
sglang/srt/layers/fused_moe.py,sha256=uyrbCaIHioq3G00xQUrCo53hYDoHzk5rep3Eji3oQiQ,20258
|
41
|
-
sglang/srt/layers/linear.py,sha256=qLwFkOiRAljzE7LkAkLRdcCdVMk-t7b56jEjwQAuYDM,33953
|
42
|
-
sglang/srt/layers/logits_processor.py,sha256=VjP6T582K64X0mfyPUkhcIEZxsqJNu6ziqR3V82N_jE,10118
|
43
|
-
sglang/srt/layers/radix_attention.py,sha256=to6w0kIq6dtaOYJtqIZcqR3t1yf05qBH1LWnFlE-jEQ,6374
|
44
|
-
sglang/srt/layers/token_attention.py,sha256=uBtk3I6KeFjBRKRuQoG5BEZtVJsX4p7UOtJoej6ILZI,7411
|
45
|
-
sglang/srt/layers/quantization/__init__.py,sha256=PQFzdPpul98DvywBA6YMBOnrMjtHE1LMlMpJ7FM8J3I,1971
|
46
|
-
sglang/srt/layers/quantization/fp8.py,sha256=jaqgRFnHC--IL8iqB6Qygi-KXYPYBKKqt_j4Rk55_h4,24946
|
47
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=8rN2cdMr61LWy07lingEqLnNy0W5Rebdn14IsTQ9PCs,5049
|
48
|
-
sglang/srt/managers/io_struct.py,sha256=WmBGrWR8R6X2zh2p1FkfPZtJzuGSlNW8cmIDm0EEqMA,5528
|
49
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=2it1o4dKd7nFzfZflOw1cT03gFktqC2sVPICbBSR4c0,19594
|
50
|
-
sglang/srt/managers/controller/cuda_graph_runner.py,sha256=KEqX4Tc1yEWW52LzzFb4THb-guYIaft2pxxH8rWchSA,8808
|
51
|
-
sglang/srt/managers/controller/infer_batch.py,sha256=3DixMdSW0odH5I6p7h8_xtRlHx4q76ArR6YZW8Gkqzg,35888
|
52
|
-
sglang/srt/managers/controller/manager_multi.py,sha256=DT8Y9RF5OyTxlrLEZYz4claNWir3UrVztdOZaVPiA6g,6077
|
53
|
-
sglang/srt/managers/controller/manager_single.py,sha256=2xO_iWK6tWvc0B31nKbe2N3klxwQBJmPTnFhNjzhVSI,4566
|
54
|
-
sglang/srt/managers/controller/model_runner.py,sha256=9o4xWnfI9-FJU6-S7WfEFlGMjWA2YesAhUKpuq8urhk,14854
|
55
|
-
sglang/srt/managers/controller/radix_cache.py,sha256=tx8LEQpqLxipw9UUVj4D1YQLMMDmWnjDYv8oDlOl-co,8210
|
56
|
-
sglang/srt/managers/controller/schedule_heuristic.py,sha256=SQAGzPS3aB_TPj7rnPBhewwyR6W1sVwW4D3zG3JUY00,2714
|
57
|
-
sglang/srt/managers/controller/tp_worker.py,sha256=VYhO3xcJrcDQwonGLWSWKHq4T7BvFmb6-L5LxY3-fhE,30607
|
58
|
-
sglang/srt/model_loader/model_loader.py,sha256=VS8VQL5ITN3akZ9eU_-uHWMan1axLMNG2_O12HzGysA,10132
|
59
|
-
sglang/srt/model_loader/utils.py,sha256=I2PS5HIH5Cg-p7xKO_Cw_foK2vQ61xVc3zQv7CbeGEw,10120
|
60
|
-
sglang/srt/models/chatglm.py,sha256=pH8g2Dj8qQLGPYpWVTb-IONfXsdfmpWi0-IEYNdSi4s,13296
|
61
|
-
sglang/srt/models/commandr.py,sha256=hHsNQWi0X8rNL7_gpcoUxQxdhxtvx5_RVx8u6cLzqYQ,13606
|
62
|
-
sglang/srt/models/dbrx.py,sha256=rRxOusGPu670ommeqXg62AllwB1apzE4yZoWc1fcr2M,14095
|
63
|
-
sglang/srt/models/deepseek.py,sha256=YtoPmv4fKmiH_jsRMSab9Wxq3aOZga9pCPGnkCs3Vvs,15457
|
64
|
-
sglang/srt/models/deepseek_v2.py,sha256=1FqLe6tSENFpYgcEkmMr2-M4qksgne2glU3kZhSBB0Q,19527
|
65
|
-
sglang/srt/models/gemma.py,sha256=DweoalfWYhLL-ZWLAO5gl4SCZflWmejVeDG3Vky_WNo,11719
|
66
|
-
sglang/srt/models/gemma2.py,sha256=x3Dua-TVwRm5fJjo5UDekdoWqwt9xYbMuB-ogfXyiT8,15860
|
67
|
-
sglang/srt/models/gpt_bigcode.py,sha256=XHO1naPdXfiKYQRQ6uZe1fN3PBDhKH3-bchsaaZvfE4,9637
|
68
|
-
sglang/srt/models/grok.py,sha256=611zrlIchvFaVfztRdBY7z97oU3KB-anykbOZy1hK6M,27295
|
69
|
-
sglang/srt/models/internlm2.py,sha256=8MNcwxU5Th9IxWa314HqqmbCRlPUFScnfneBDs0riIU,11659
|
70
|
-
sglang/srt/models/llama2.py,sha256=OyAf_lun5aZEsT80WmrIYBF8QXTXRpW8sUlylr4AZIc,14204
|
71
|
-
sglang/srt/models/llama_classification.py,sha256=Z2dvZAdOwCnN-lGFZRcwU0rNreE1gKwLefeWzEH36Uw,4366
|
72
|
-
sglang/srt/models/llava.py,sha256=vBI6EEeOG_9o23Shi9h8k58rxTOHZnSKMmPl3B3Q3uc,17924
|
73
|
-
sglang/srt/models/llavavid.py,sha256=SrNQ-U2wekHvP_up-ZXRkCSros2NzheHpPfXHrp0YBU,13050
|
74
|
-
sglang/srt/models/minicpm.py,sha256=9uE8D-NopAj-sfaKJ7d-0x-PuCTEevQPoHPZvZlwstA,13277
|
75
|
-
sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
|
76
|
-
sglang/srt/models/mixtral.py,sha256=LWOIu3okC_30RWTy2Yh2xDjQzbiEBMEpZquleDMU1Y8,20831
|
77
|
-
sglang/srt/models/mixtral_quant.py,sha256=ObxdI5thDuy-7ljLMwWdmkuirhI1ESoA_h_mTYE5BE4,13656
|
78
|
-
sglang/srt/models/qwen.py,sha256=AUf9L6tkdFXn6VTlBariplMH7yM-o96JH0xLLoM4YgI,9440
|
79
|
-
sglang/srt/models/qwen2.py,sha256=87Tt1Bti-Py3AGudcf7k5ni-OHhtDKPj_Hke44YGw4U,11718
|
80
|
-
sglang/srt/models/qwen2_moe.py,sha256=oHNoo45myV5kitkls2GWVzuGt1Q4pRHN2nLlXEltFI8,17581
|
81
|
-
sglang/srt/models/stablelm.py,sha256=Z_XCDSHY_QMz3lZwwkZdIZjEOizZjLYJU9GDi8o08qQ,10802
|
82
|
-
sglang/srt/models/yivl.py,sha256=55KPrQ-dVplI0hh2WCSugjc1luE0J2UAafjZxu_7Xuc,4367
|
83
|
-
sglang/srt/openai_api/adapter.py,sha256=DVZ2niAEOgE8GQdYnuvwjrGiFRkAu5YtOB-yxOlF_Eg,15868
|
84
|
-
sglang/srt/openai_api/protocol.py,sha256=jTb22jv5caB7k7Ub2ltYEbTtDheZjwwWAAUdvjiLTR0,5741
|
85
|
-
sglang/test/test_conversation.py,sha256=gF_AyOxQgpPQBPnA57-kq-M0p_zFu-rBDMFgAq655Rw,1596
|
86
|
-
sglang/test/test_openai_protocol.py,sha256=DVx3r6hrb8oRqbo5AYIleldxbqMBTtb-gtORM6t_Y1c,1661
|
87
|
-
sglang/test/test_programs.py,sha256=s4WGpTmYP4Yx5g8JYZpbkeF9RN5iUnlKdi8FGAZovTc,13756
|
88
|
-
sglang/test/test_utils.py,sha256=kD_fQe3WroZ9Kc3NBRKPiZOFJ_JD2uEE9XIvPp6AD9Y,11048
|
89
|
-
sglang-0.2.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
90
|
-
sglang-0.2.6.dist-info/METADATA,sha256=g_G_XHbWCNSY9F6RieXV43svnNzq1wonwrArNxX0VNA,32095
|
91
|
-
sglang-0.2.6.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
92
|
-
sglang-0.2.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
93
|
-
sglang-0.2.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|