sglang 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +2 -2
- sglang/api.py +30 -4
- sglang/backend/litellm.py +2 -2
- sglang/backend/openai.py +26 -15
- sglang/backend/runtime_endpoint.py +18 -14
- sglang/bench_latency.py +317 -0
- sglang/global_config.py +5 -1
- sglang/lang/chat_template.py +41 -6
- sglang/lang/compiler.py +2 -2
- sglang/lang/interpreter.py +6 -2
- sglang/lang/ir.py +74 -28
- sglang/launch_server.py +4 -1
- sglang/launch_server_llavavid.py +2 -1
- sglang/srt/constrained/__init__.py +14 -6
- sglang/srt/constrained/fsm_cache.py +6 -3
- sglang/srt/constrained/jump_forward.py +113 -25
- sglang/srt/conversation.py +2 -0
- sglang/srt/flush_cache.py +2 -0
- sglang/srt/hf_transformers_utils.py +68 -9
- sglang/srt/layers/extend_attention.py +2 -1
- sglang/srt/layers/fused_moe.py +280 -169
- sglang/srt/layers/logits_processor.py +106 -42
- sglang/srt/layers/radix_attention.py +53 -29
- sglang/srt/layers/token_attention.py +4 -1
- sglang/srt/managers/controller/dp_worker.py +6 -3
- sglang/srt/managers/controller/infer_batch.py +144 -69
- sglang/srt/managers/controller/manager_multi.py +5 -5
- sglang/srt/managers/controller/manager_single.py +9 -4
- sglang/srt/managers/controller/model_runner.py +167 -55
- sglang/srt/managers/controller/radix_cache.py +4 -0
- sglang/srt/managers/controller/schedule_heuristic.py +2 -0
- sglang/srt/managers/controller/tp_worker.py +156 -134
- sglang/srt/managers/detokenizer_manager.py +19 -21
- sglang/srt/managers/io_struct.py +11 -5
- sglang/srt/managers/tokenizer_manager.py +16 -14
- sglang/srt/model_config.py +89 -4
- sglang/srt/models/chatglm.py +399 -0
- sglang/srt/models/commandr.py +2 -2
- sglang/srt/models/dbrx.py +1 -1
- sglang/srt/models/gemma.py +5 -1
- sglang/srt/models/gemma2.py +436 -0
- sglang/srt/models/grok.py +204 -137
- sglang/srt/models/llama2.py +12 -5
- sglang/srt/models/llama_classification.py +107 -0
- sglang/srt/models/llava.py +11 -8
- sglang/srt/models/llavavid.py +1 -1
- sglang/srt/models/minicpm.py +373 -0
- sglang/srt/models/mixtral.py +164 -115
- sglang/srt/models/mixtral_quant.py +0 -1
- sglang/srt/models/qwen.py +1 -1
- sglang/srt/models/qwen2.py +1 -1
- sglang/srt/models/qwen2_moe.py +454 -0
- sglang/srt/models/stablelm.py +1 -1
- sglang/srt/models/yivl.py +2 -2
- sglang/srt/openai_api_adapter.py +35 -25
- sglang/srt/openai_protocol.py +2 -2
- sglang/srt/server.py +69 -19
- sglang/srt/server_args.py +76 -43
- sglang/srt/utils.py +177 -35
- sglang/test/test_programs.py +28 -10
- sglang/utils.py +4 -3
- {sglang-0.1.17.dist-info → sglang-0.1.19.dist-info}/METADATA +44 -31
- sglang-0.1.19.dist-info/RECORD +81 -0
- {sglang-0.1.17.dist-info → sglang-0.1.19.dist-info}/WHEEL +1 -1
- sglang/srt/managers/router/infer_batch.py +0 -596
- sglang/srt/managers/router/manager.py +0 -82
- sglang/srt/managers/router/model_rpc.py +0 -818
- sglang/srt/managers/router/model_runner.py +0 -445
- sglang/srt/managers/router/radix_cache.py +0 -267
- sglang/srt/managers/router/scheduler.py +0 -59
- sglang-0.1.17.dist-info/RECORD +0 -81
- {sglang-0.1.17.dist-info → sglang-0.1.19.dist-info}/LICENSE +0 -0
- {sglang-0.1.17.dist-info → sglang-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,59 +0,0 @@
|
|
1
|
-
import random
|
2
|
-
from collections import defaultdict
|
3
|
-
|
4
|
-
|
5
|
-
class Scheduler:
|
6
|
-
def __init__(
|
7
|
-
self,
|
8
|
-
schedule_heuristic,
|
9
|
-
max_running_seqs,
|
10
|
-
max_prefill_num_tokens,
|
11
|
-
max_total_num_tokens,
|
12
|
-
tree_cache,
|
13
|
-
):
|
14
|
-
self.schedule_heuristic = schedule_heuristic
|
15
|
-
self.max_running_seqs = max_running_seqs
|
16
|
-
self.max_prefill_num_tokens = max_prefill_num_tokens
|
17
|
-
self.max_total_num_tokens = max_total_num_tokens
|
18
|
-
self.tree_cache = tree_cache
|
19
|
-
|
20
|
-
def get_priority_queue(self, forward_queue):
|
21
|
-
if self.schedule_heuristic == "lpm":
|
22
|
-
# longest prefix match
|
23
|
-
forward_queue.sort(key=lambda x: -len(x.prefix_indices))
|
24
|
-
return forward_queue
|
25
|
-
elif self.schedule_heuristic == "random":
|
26
|
-
random.shuffle(forward_queue)
|
27
|
-
return forward_queue
|
28
|
-
elif self.schedule_heuristic == "fcfs":
|
29
|
-
return forward_queue
|
30
|
-
elif self.schedule_heuristic == "dfs-weight":
|
31
|
-
last_node_to_reqs = defaultdict(list)
|
32
|
-
for req in forward_queue:
|
33
|
-
last_node_to_reqs[req.last_node].append(req)
|
34
|
-
|
35
|
-
node_to_weight = defaultdict(int)
|
36
|
-
for node in last_node_to_reqs:
|
37
|
-
node_to_weight[node] = len(last_node_to_reqs[node])
|
38
|
-
self.calc_weight(self.tree_cache.root_node, node_to_weight)
|
39
|
-
|
40
|
-
q = []
|
41
|
-
self.get_dfs_priority(
|
42
|
-
self.tree_cache.root_node, node_to_weight, last_node_to_reqs, q
|
43
|
-
)
|
44
|
-
assert len(q) == len(forward_queue)
|
45
|
-
return q
|
46
|
-
else:
|
47
|
-
raise ValueError(f"Unknown schedule_heuristic: {self.schedule_heuristic}")
|
48
|
-
|
49
|
-
def calc_weight(self, cur_node, node_to_weight):
|
50
|
-
for child in cur_node.children.values():
|
51
|
-
self.calc_weight(child, node_to_weight)
|
52
|
-
node_to_weight[cur_node] += node_to_weight[child]
|
53
|
-
|
54
|
-
def get_dfs_priority(self, cur_node, node_to_priority, last_node_to_reqs, q):
|
55
|
-
childs = [child for child in cur_node.children.values()]
|
56
|
-
childs.sort(key=lambda x: -node_to_priority[x])
|
57
|
-
for child in childs:
|
58
|
-
self.get_dfs_priority(child, node_to_priority, last_node_to_reqs, q)
|
59
|
-
q.extend(last_node_to_reqs[cur_node])
|
sglang-0.1.17.dist-info/RECORD
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
sglang/__init__.py,sha256=yEHUYdlMU-BtdYBBPSNKnqUTfQ4cdwWwWqA1BfLVB1M,1116
|
2
|
-
sglang/api.py,sha256=imnZeqgNmkex9Wg3B5VQ1M8FlBZzx9Wh9D0q5ibO0Bc,4548
|
3
|
-
sglang/global_config.py,sha256=Osa7UjpAXjEcULYvMUSa93JrvNP03vR0xLGy2gQ6uJw,1233
|
4
|
-
sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
|
5
|
-
sglang/launch_server_llavavid.py,sha256=UWo_qUCJ9yknp1TVPzrz4B_aZtEuQpLQq0l96FMgynI,1058
|
6
|
-
sglang/utils.py,sha256=-IlcZtGHnOB4Gl_ltsQZPw9Epe7maUnXFTRtvMniw2k,8146
|
7
|
-
sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
sglang/backend/anthropic.py,sha256=iJjXiDMZbtvX2XNG78MG9kM7SpZq9hmXVuzT_T18elw,2076
|
9
|
-
sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
10
|
-
sglang/backend/litellm.py,sha256=Y8lfWN0z8_hKvLMJbl-Xuw7Yn_5drNusC_wJv4BOQUY,2439
|
11
|
-
sglang/backend/openai.py,sha256=Xv_QJc6tN5W1Da2fu3kzvrrfT9RvW921_Cwq8R_Ak9Y,14711
|
12
|
-
sglang/backend/runtime_endpoint.py,sha256=8NyWgMvhzUcA5VEsPLo1AacZ_UPVSnpxpzt6vYdVQSU,8871
|
13
|
-
sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
|
14
|
-
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
sglang/lang/chat_template.py,sha256=ogIT8iMlDcSEgcNBTh5pRLoCkdQI_ec5Hc27wFUFDIg,11532
|
16
|
-
sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
|
17
|
-
sglang/lang/interpreter.py,sha256=_QIzpnfSr02JUkeaJzTcZxxF4gv0naY16fvVkDZH9xE,29493
|
18
|
-
sglang/lang/ir.py,sha256=EMAXzC7upkx6qvKzCss8p7OSQYAXCT1hCl649s0Kp_c,13882
|
19
|
-
sglang/lang/tracer.py,sha256=QcslAObEjepk8XmiqCobwzWaDpihofEQXjeRs_3B8NQ,8282
|
20
|
-
sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
|
21
|
-
sglang/srt/flush_cache.py,sha256=N0etybT9tIS8_zreJFu64j9TYHKiR3sVXMTjHwHK8X0,382
|
22
|
-
sglang/srt/hf_transformers_utils.py,sha256=3aDNhwxaaObiMCrw9nqzBILoosIx1-Qy7COK6NIHtog,8244
|
23
|
-
sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
|
24
|
-
sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
|
25
|
-
sglang/srt/model_config.py,sha256=6XJHUtev-hI-E3NAIoWiNKtpZfN2hHoaxs_r79vGDe8,1724
|
26
|
-
sglang/srt/openai_api_adapter.py,sha256=BDUwhTQpFJHHnWsw4a9XsoGhEZkfgZqd3EUbkD5g5ko,15089
|
27
|
-
sglang/srt/openai_protocol.py,sha256=jChImDalBjYk9tzBccb_m5eVVJExdHm9LhCJ4Cso5LU,5350
|
28
|
-
sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
|
29
|
-
sglang/srt/server.py,sha256=O1lJq6F95ZHeVb4aantcE7SnnM3XM7JSCa6il8vf_Mg,11595
|
30
|
-
sglang/srt/server_args.py,sha256=N5sLrpLBL6Zkfspgvanl8-9bKhMSM2Lrv9gHJ8ENmLc,10822
|
31
|
-
sglang/srt/utils.py,sha256=pvyyPvJF6RnoR0DG0wSDo73mSS_2x2MhtKqVmXObtyA,14654
|
32
|
-
sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
|
33
|
-
sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
|
34
|
-
sglang/srt/constrained/fsm_cache.py,sha256=RmAdaAAXlh_KeDiK4w3AARiEnvrbsuELROBgMzJvZKk,967
|
35
|
-
sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
|
36
|
-
sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
|
37
|
-
sglang/srt/layers/extend_attention.py,sha256=JUYuYSAhfbgOXrwIK5YHJCXPq54a6IZ7vQrze-3VvMQ,12955
|
38
|
-
sglang/srt/layers/fused_moe.py,sha256=0JchWmMrqF4Dqn3_gcBcaS2_uypgmOiEE0vjfo-l24U,19484
|
39
|
-
sglang/srt/layers/logits_processor.py,sha256=96WMfpBAD-nQNq4cQ4edfhqqS3HuDkAIj42EWj_8Rwo,7283
|
40
|
-
sglang/srt/layers/radix_attention.py,sha256=xsF8G-jrXi076Xwk_7-eD-FbNJvDvGGH6Pk4EzMUduA,5818
|
41
|
-
sglang/srt/layers/token_attention.py,sha256=rVbPlFpmLoU3nx3qtK2YZdynDxfvMKtQNTPeKi0KNP0,8823
|
42
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=XzhlONpgAQBPUWotCGJn6XnIA7YTm6JEmHxj0Zbn6_A,3452
|
43
|
-
sglang/srt/managers/io_struct.py,sha256=oWHLvrdszhY8y5pNlFtoVYDBVslEM-rMCegIfbFYOco,4370
|
44
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=uV8JuASF2pm95Hvit9dUF4y7juowp1aZ_Yl26Wh-mr0,14827
|
45
|
-
sglang/srt/managers/controller/dp_worker.py,sha256=xN7oQ3TG0FeX5K7nv6p3tUXCSE8wn0svdBHFePWe2ZU,3635
|
46
|
-
sglang/srt/managers/controller/infer_batch.py,sha256=sMjntty1MPDo__QzsxBVavMFeVIBlWU1x2lfRIP3Fmg,22716
|
47
|
-
sglang/srt/managers/controller/manager_multi.py,sha256=VmDkViOc3KFZA5HCcqC1mTmwuVda95NqELzMOrjNsp4,6629
|
48
|
-
sglang/srt/managers/controller/manager_single.py,sha256=CMaEl304o1SvNl3t-BpBrrQeyjmfdxNkKxlZh_c49sQ,3222
|
49
|
-
sglang/srt/managers/controller/model_runner.py,sha256=08HpdJYih-Nz_IlJ5a_53bb133ESEJ18Y_KSNJ0mTNQ,16993
|
50
|
-
sglang/srt/managers/controller/radix_cache.py,sha256=QnScfPDzy_QgZt0nM2BzDI_hDiohmDpJ8QKlAHAspxw,8127
|
51
|
-
sglang/srt/managers/controller/schedule_heuristic.py,sha256=DUNbv8DWSjk6I1pabfPGTYhZRz8vAFCsAh8IQcm1jxM,2276
|
52
|
-
sglang/srt/managers/controller/tp_worker.py,sha256=7qkDHURfeEPDSbUuN_-glwdgJ66H6dXd49yV8DT5JK0,31306
|
53
|
-
sglang/srt/managers/router/infer_batch.py,sha256=PEq_tCQNnmSDerlL6RRjJKadFwgP0r7l67OZypHq-II,22088
|
54
|
-
sglang/srt/managers/router/manager.py,sha256=3kTf05O2ADU91wIDoFpIZJXEz1dWeMKis0hn7j1dbzo,2693
|
55
|
-
sglang/srt/managers/router/model_rpc.py,sha256=-W-oWF1nOiWp7TwjTUo0DN4-mPdTK4S8noiVkLoQ-vo,31877
|
56
|
-
sglang/srt/managers/router/model_runner.py,sha256=PG7iSADgk_E1Eb60mS13Gl5MgHidEmi3YnO4k_Oz-7E,16515
|
57
|
-
sglang/srt/managers/router/radix_cache.py,sha256=QnScfPDzy_QgZt0nM2BzDI_hDiohmDpJ8QKlAHAspxw,8127
|
58
|
-
sglang/srt/managers/router/scheduler.py,sha256=od3fjTNyTjwTDbXVfT8jEHNPvNDk6Ss9NUUkIeXyq8s,2268
|
59
|
-
sglang/srt/models/commandr.py,sha256=JWjljtNr_t_L9PdPuymo6beUS0_EJ7NHZHrhKD3xoL0,13606
|
60
|
-
sglang/srt/models/dbrx.py,sha256=Wr45o_DTU1YTq3h5caTAH_1R3nYCSwRyKha64Ygl4Ak,14074
|
61
|
-
sglang/srt/models/gemma.py,sha256=rOw9WBNZqdeKfJT9wUa-y5sAj-pAj0YNfjk-dKtxEhA,11501
|
62
|
-
sglang/srt/models/grok.py,sha256=R_Y6CptcPgYvRt9YWob-LG2D3hTCa9VxjmA2k734Xlg,26944
|
63
|
-
sglang/srt/models/llama2.py,sha256=-IKmBoUDcZ76dRjMSNy0rUPB7NdDh4Ayc8skV0WlRCA,11959
|
64
|
-
sglang/srt/models/llava.py,sha256=S9Kz87les4Z_nZ2KAp1ZgmaK-ntILdZHqqqadJBLAt4,17893
|
65
|
-
sglang/srt/models/llavavid.py,sha256=8SVkICyDSvsw-5aSmGqSLT9S1xw8ouH0gJmAAeFLOPo,13029
|
66
|
-
sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
|
67
|
-
sglang/srt/models/mixtral.py,sha256=dDdwkxHOfZdtfr3CixjXIZwNmB5DBfZPSQGmdz2-cJQ,20727
|
68
|
-
sglang/srt/models/mixtral_quant.py,sha256=ZP5YfMaZUfthXwSO_84o6L6Be8RhJR-1-lvG5w42wis,13636
|
69
|
-
sglang/srt/models/qwen.py,sha256=5Q10AAzBy79SRtZinpnRQYJskjGst2jf4IhJBkmDtjE,9419
|
70
|
-
sglang/srt/models/qwen2.py,sha256=_7wLaaDEs_RUgS1cjC8wgk7JqJ6CngHPNTMsDdH5Yok,11465
|
71
|
-
sglang/srt/models/stablelm.py,sha256=rzkCKYC0mGg1geFTedcbtyoOFgr_s9HacYbdb_9XJMU,10781
|
72
|
-
sglang/srt/models/yivl.py,sha256=wHaoyC2JAvhWssfgwN84BRG8CND4d7TMj1Q-pzbDea8,4367
|
73
|
-
sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
|
74
|
-
sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
|
75
|
-
sglang/test/test_programs.py,sha256=HIfIEjO6fgBmbLIy4z4zpbz6oVw2GvHP8CeVQd69YDU,13378
|
76
|
-
sglang/test/test_utils.py,sha256=Mjn2btfmEQQ7rpsLfNo6VugXCPzUmRpNhssWvxevN4s,11038
|
77
|
-
sglang-0.1.17.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
78
|
-
sglang-0.1.17.dist-info/METADATA,sha256=AZQ36_LEiRR8Bf2AmS0qQMdFBmQK8boZwnlgFaLeoUg,29242
|
79
|
-
sglang-0.1.17.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
80
|
-
sglang-0.1.17.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
81
|
-
sglang-0.1.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|