sglang 0.3.5__py3-none-any.whl → 0.3.5.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_serving.py +113 -3
- sglang/srt/configs/model_config.py +5 -2
- sglang/srt/constrained/__init__.py +2 -66
- sglang/srt/constrained/base_grammar_backend.py +72 -0
- sglang/srt/constrained/outlines_backend.py +165 -0
- sglang/srt/constrained/outlines_jump_forward.py +182 -0
- sglang/srt/constrained/xgrammar_backend.py +114 -0
- sglang/srt/layers/attention/triton_ops/decode_attention.py +7 -0
- sglang/srt/layers/attention/triton_ops/extend_attention.py +6 -0
- sglang/srt/layers/fused_moe/fused_moe.py +23 -7
- sglang/srt/layers/quantization/base_config.py +4 -6
- sglang/srt/layers/vocab_parallel_embedding.py +216 -150
- sglang/srt/managers/io_struct.py +5 -3
- sglang/srt/managers/schedule_batch.py +14 -20
- sglang/srt/managers/scheduler.py +153 -94
- sglang/srt/managers/tokenizer_manager.py +81 -17
- sglang/srt/metrics/collector.py +211 -0
- sglang/srt/metrics/func_timer.py +108 -0
- sglang/srt/mm_utils.py +1 -1
- sglang/srt/model_executor/cuda_graph_runner.py +2 -2
- sglang/srt/model_executor/forward_batch_info.py +7 -3
- sglang/srt/model_executor/model_runner.py +2 -1
- sglang/srt/models/gemma2_reward.py +69 -0
- sglang/srt/models/gpt2.py +31 -37
- sglang/srt/models/internlm2_reward.py +62 -0
- sglang/srt/models/llama.py +11 -6
- sglang/srt/models/llama_reward.py +5 -26
- sglang/srt/models/qwen2_vl.py +5 -7
- sglang/srt/openai_api/adapter.py +6 -2
- sglang/srt/sampling/sampling_batch_info.py +2 -3
- sglang/srt/sampling/sampling_params.py +0 -14
- sglang/srt/server.py +58 -16
- sglang/srt/server_args.py +42 -22
- sglang/srt/utils.py +87 -0
- sglang/test/simple_eval_common.py +1 -1
- sglang/test/simple_eval_humaneval.py +2 -2
- sglang/test/simple_eval_mgsm.py +2 -2
- sglang/test/test_utils.py +18 -4
- sglang/utils.py +1 -0
- sglang/version.py +1 -1
- {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/METADATA +11 -7
- {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/RECORD +45 -42
- {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/WHEEL +1 -1
- sglang/srt/constrained/base_tool_cache.py +0 -65
- sglang/srt/constrained/bnf_cache.py +0 -61
- sglang/srt/constrained/fsm_cache.py +0 -95
- sglang/srt/constrained/grammar.py +0 -190
- sglang/srt/constrained/jump_forward.py +0 -203
- {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/LICENSE +0 -0
- {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/top_level.txt +0 -0
@@ -1,203 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Copyright 2023-2024 SGLang Team
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
14
|
-
"""
|
15
|
-
|
16
|
-
"""
|
17
|
-
Faster constrained decoding.
|
18
|
-
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
|
19
|
-
"""
|
20
|
-
|
21
|
-
import dataclasses
|
22
|
-
import logging
|
23
|
-
from collections import defaultdict
|
24
|
-
|
25
|
-
import interegular
|
26
|
-
import outlines.caching
|
27
|
-
from interegular import InvalidSyntax
|
28
|
-
|
29
|
-
from sglang.srt.constrained import (
|
30
|
-
FSMInfo,
|
31
|
-
disk_cache,
|
32
|
-
make_byte_level_fsm,
|
33
|
-
make_deterministic_fsm,
|
34
|
-
)
|
35
|
-
from sglang.srt.constrained.base_tool_cache import BaseToolCache
|
36
|
-
|
37
|
-
IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
|
38
|
-
|
39
|
-
logger = logging.getLogger(__name__)
|
40
|
-
|
41
|
-
|
42
|
-
@dataclasses.dataclass
|
43
|
-
class JumpEdge:
|
44
|
-
symbol: str = None
|
45
|
-
symbol_next_state: int = None
|
46
|
-
byte: int = None
|
47
|
-
byte_next_state: int = None
|
48
|
-
|
49
|
-
|
50
|
-
class JumpForwardMap:
|
51
|
-
def __init__(self, regex_string):
|
52
|
-
@disk_cache()
|
53
|
-
def _init_state_to_jump_forward(regex_string):
|
54
|
-
try:
|
55
|
-
regex_pattern = interegular.parse_pattern(regex_string)
|
56
|
-
except InvalidSyntax as e:
|
57
|
-
logger.warning(f"skip invalid regex: {regex_string}, {e=}")
|
58
|
-
self.state_to_jump_forward = None
|
59
|
-
return
|
60
|
-
|
61
|
-
byte_fsm = make_byte_level_fsm(
|
62
|
-
regex_pattern.to_fsm().reduce(), keep_utf8=True
|
63
|
-
)
|
64
|
-
regex_fsm, _ = make_deterministic_fsm(byte_fsm)
|
65
|
-
|
66
|
-
fsm_info: FSMInfo = regex_fsm.fsm_info
|
67
|
-
|
68
|
-
symbol_to_id = fsm_info.alphabet_symbol_mapping
|
69
|
-
id_to_symbol = {}
|
70
|
-
for symbol, id_ in symbol_to_id.items():
|
71
|
-
id_to_symbol.setdefault(id_, []).append(symbol)
|
72
|
-
|
73
|
-
transitions = fsm_info.transitions
|
74
|
-
|
75
|
-
outgoings_ct = defaultdict(int)
|
76
|
-
# NOTE(lsyin): Final states can lead to terminate, so they have one outgoing edge naturally
|
77
|
-
for s in fsm_info.finals:
|
78
|
-
outgoings_ct[s] = 1
|
79
|
-
|
80
|
-
state_to_jump_forward = {}
|
81
|
-
for (state, id_), next_state in transitions.items():
|
82
|
-
if id_ == fsm_info.alphabet_anything_value:
|
83
|
-
# Arbitrarily symbol cannot be recognized as jump forward
|
84
|
-
continue
|
85
|
-
|
86
|
-
symbols = id_to_symbol[id_]
|
87
|
-
for c in symbols:
|
88
|
-
if len(c) > 1:
|
89
|
-
# Skip byte level transitions like c = "5E"
|
90
|
-
continue
|
91
|
-
|
92
|
-
outgoings_ct[state] += 1
|
93
|
-
if outgoings_ct[state] > 1:
|
94
|
-
if state in state_to_jump_forward:
|
95
|
-
del state_to_jump_forward[state]
|
96
|
-
break
|
97
|
-
|
98
|
-
state_to_jump_forward[state] = JumpEdge(
|
99
|
-
symbol=c,
|
100
|
-
symbol_next_state=next_state,
|
101
|
-
)
|
102
|
-
|
103
|
-
# Process the byte level jump forward
|
104
|
-
outgoings_ct = defaultdict(int)
|
105
|
-
for s in fsm_info.finals:
|
106
|
-
outgoings_ct[s] = 1
|
107
|
-
|
108
|
-
for (state, id_), next_state in transitions.items():
|
109
|
-
if id_ == fsm_info.alphabet_anything_value:
|
110
|
-
continue
|
111
|
-
symbols = id_to_symbol[id_]
|
112
|
-
for c in symbols:
|
113
|
-
byte_ = None
|
114
|
-
if len(c) == 1 and ord(c) < 0x80:
|
115
|
-
# ASCII character
|
116
|
-
byte_ = ord(c)
|
117
|
-
elif len(c) > 1:
|
118
|
-
# FIXME: This logic is due to the leading \x00
|
119
|
-
# https://github.com/outlines-dev/outlines/pull/930
|
120
|
-
byte_ = int(symbols[0][1:], 16)
|
121
|
-
|
122
|
-
if byte_ is not None:
|
123
|
-
outgoings_ct[state] += 1
|
124
|
-
if outgoings_ct[state] > 1:
|
125
|
-
if state in state_to_jump_forward:
|
126
|
-
del state_to_jump_forward[state]
|
127
|
-
break
|
128
|
-
e = state_to_jump_forward.get(state, JumpEdge())
|
129
|
-
e.byte = byte_
|
130
|
-
e.byte_next_state = next_state
|
131
|
-
state_to_jump_forward[state] = e
|
132
|
-
|
133
|
-
return state_to_jump_forward
|
134
|
-
|
135
|
-
self.state_to_jump_forward = _init_state_to_jump_forward(regex_string)
|
136
|
-
|
137
|
-
def jump_forward_symbol(self, state):
|
138
|
-
jump_forward_str = ""
|
139
|
-
next_state = state
|
140
|
-
while state in self.state_to_jump_forward:
|
141
|
-
e = self.state_to_jump_forward[state]
|
142
|
-
if e.symbol is None:
|
143
|
-
break
|
144
|
-
jump_forward_str += e.symbol
|
145
|
-
next_state = e.symbol_next_state
|
146
|
-
state = next_state
|
147
|
-
|
148
|
-
return jump_forward_str, next_state
|
149
|
-
|
150
|
-
def jump_forward_byte(self, state):
|
151
|
-
if state not in self.state_to_jump_forward:
|
152
|
-
return None
|
153
|
-
|
154
|
-
jump_forward_bytes = []
|
155
|
-
next_state = None
|
156
|
-
while state in self.state_to_jump_forward:
|
157
|
-
e = self.state_to_jump_forward[state]
|
158
|
-
assert e.byte is not None and e.byte_next_state is not None
|
159
|
-
jump_forward_bytes.append((e.byte, e.byte_next_state))
|
160
|
-
next_state = e.byte_next_state
|
161
|
-
state = next_state
|
162
|
-
|
163
|
-
return jump_forward_bytes
|
164
|
-
|
165
|
-
def is_jump_forward_symbol_state(self, state):
|
166
|
-
return (
|
167
|
-
state in self.state_to_jump_forward
|
168
|
-
and self.state_to_jump_forward[state].symbol is not None
|
169
|
-
)
|
170
|
-
|
171
|
-
|
172
|
-
class JumpForwardCache(BaseToolCache):
|
173
|
-
def __init__(self):
|
174
|
-
super().__init__()
|
175
|
-
|
176
|
-
def init_value(self, regex):
|
177
|
-
forward_map = JumpForwardMap(regex)
|
178
|
-
if forward_map.state_to_jump_forward:
|
179
|
-
return forward_map
|
180
|
-
else:
|
181
|
-
return None
|
182
|
-
|
183
|
-
|
184
|
-
def test_main(regex_string):
|
185
|
-
jump_forward_map = JumpForwardMap(regex_string)
|
186
|
-
for state, e in jump_forward_map.state_to_jump_forward.items():
|
187
|
-
if e.symbol is not None:
|
188
|
-
jump_forward_str, next_state = jump_forward_map.jump_forward_symbol(state)
|
189
|
-
print(f"{state} -> {next_state}", jump_forward_str)
|
190
|
-
bytes_ = jump_forward_map.jump_forward_byte(state)
|
191
|
-
print(f"{state} -> {bytes_[-1][1]}", [hex(b) for b, _ in bytes_])
|
192
|
-
|
193
|
-
|
194
|
-
if __name__ == "__main__":
|
195
|
-
import outlines
|
196
|
-
|
197
|
-
outlines.caching.clear_cache()
|
198
|
-
test_main(r"The google's DNS sever address is " + IP_REGEX)
|
199
|
-
test_main(r"霍格沃茨特快列车|霍比特人比尔博")
|
200
|
-
# 霍格: \xe9\x9c\x8d \xe6\xa0\xbc ...
|
201
|
-
# 霍比: \xe9\x9c\x8d \xe6\xaf\x94 ...
|
202
|
-
|
203
|
-
test_main(r"[-+]?[0-9]+[ ]*")
|
File without changes
|
File without changes
|