sglang 0.3.5__py3-none-any.whl → 0.3.5.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. sglang/bench_serving.py +113 -3
  2. sglang/srt/configs/model_config.py +5 -2
  3. sglang/srt/constrained/__init__.py +2 -66
  4. sglang/srt/constrained/base_grammar_backend.py +72 -0
  5. sglang/srt/constrained/outlines_backend.py +165 -0
  6. sglang/srt/constrained/outlines_jump_forward.py +182 -0
  7. sglang/srt/constrained/xgrammar_backend.py +114 -0
  8. sglang/srt/layers/attention/triton_ops/decode_attention.py +7 -0
  9. sglang/srt/layers/attention/triton_ops/extend_attention.py +6 -0
  10. sglang/srt/layers/fused_moe/fused_moe.py +23 -7
  11. sglang/srt/layers/quantization/base_config.py +4 -6
  12. sglang/srt/layers/vocab_parallel_embedding.py +216 -150
  13. sglang/srt/managers/io_struct.py +5 -3
  14. sglang/srt/managers/schedule_batch.py +14 -20
  15. sglang/srt/managers/scheduler.py +153 -94
  16. sglang/srt/managers/tokenizer_manager.py +81 -17
  17. sglang/srt/metrics/collector.py +211 -0
  18. sglang/srt/metrics/func_timer.py +108 -0
  19. sglang/srt/mm_utils.py +1 -1
  20. sglang/srt/model_executor/cuda_graph_runner.py +2 -2
  21. sglang/srt/model_executor/forward_batch_info.py +7 -3
  22. sglang/srt/model_executor/model_runner.py +2 -1
  23. sglang/srt/models/gemma2_reward.py +69 -0
  24. sglang/srt/models/gpt2.py +31 -37
  25. sglang/srt/models/internlm2_reward.py +62 -0
  26. sglang/srt/models/llama.py +11 -6
  27. sglang/srt/models/llama_reward.py +5 -26
  28. sglang/srt/models/qwen2_vl.py +5 -7
  29. sglang/srt/openai_api/adapter.py +6 -2
  30. sglang/srt/sampling/sampling_batch_info.py +2 -3
  31. sglang/srt/sampling/sampling_params.py +0 -14
  32. sglang/srt/server.py +58 -16
  33. sglang/srt/server_args.py +42 -22
  34. sglang/srt/utils.py +87 -0
  35. sglang/test/simple_eval_common.py +1 -1
  36. sglang/test/simple_eval_humaneval.py +2 -2
  37. sglang/test/simple_eval_mgsm.py +2 -2
  38. sglang/test/test_utils.py +18 -4
  39. sglang/utils.py +1 -0
  40. sglang/version.py +1 -1
  41. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/METADATA +11 -7
  42. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/RECORD +45 -42
  43. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/WHEEL +1 -1
  44. sglang/srt/constrained/base_tool_cache.py +0 -65
  45. sglang/srt/constrained/bnf_cache.py +0 -61
  46. sglang/srt/constrained/fsm_cache.py +0 -95
  47. sglang/srt/constrained/grammar.py +0 -190
  48. sglang/srt/constrained/jump_forward.py +0 -203
  49. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/LICENSE +0 -0
  50. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/top_level.txt +0 -0
@@ -1,203 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
14
- """
15
-
16
- """
17
- Faster constrained decoding.
18
- Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
19
- """
20
-
21
- import dataclasses
22
- import logging
23
- from collections import defaultdict
24
-
25
- import interegular
26
- import outlines.caching
27
- from interegular import InvalidSyntax
28
-
29
- from sglang.srt.constrained import (
30
- FSMInfo,
31
- disk_cache,
32
- make_byte_level_fsm,
33
- make_deterministic_fsm,
34
- )
35
- from sglang.srt.constrained.base_tool_cache import BaseToolCache
36
-
37
- IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
38
-
39
- logger = logging.getLogger(__name__)
40
-
41
-
42
- @dataclasses.dataclass
43
- class JumpEdge:
44
- symbol: str = None
45
- symbol_next_state: int = None
46
- byte: int = None
47
- byte_next_state: int = None
48
-
49
-
50
- class JumpForwardMap:
51
- def __init__(self, regex_string):
52
- @disk_cache()
53
- def _init_state_to_jump_forward(regex_string):
54
- try:
55
- regex_pattern = interegular.parse_pattern(regex_string)
56
- except InvalidSyntax as e:
57
- logger.warning(f"skip invalid regex: {regex_string}, {e=}")
58
- self.state_to_jump_forward = None
59
- return
60
-
61
- byte_fsm = make_byte_level_fsm(
62
- regex_pattern.to_fsm().reduce(), keep_utf8=True
63
- )
64
- regex_fsm, _ = make_deterministic_fsm(byte_fsm)
65
-
66
- fsm_info: FSMInfo = regex_fsm.fsm_info
67
-
68
- symbol_to_id = fsm_info.alphabet_symbol_mapping
69
- id_to_symbol = {}
70
- for symbol, id_ in symbol_to_id.items():
71
- id_to_symbol.setdefault(id_, []).append(symbol)
72
-
73
- transitions = fsm_info.transitions
74
-
75
- outgoings_ct = defaultdict(int)
76
- # NOTE(lsyin): Final states can lead to terminate, so they have one outgoing edge naturally
77
- for s in fsm_info.finals:
78
- outgoings_ct[s] = 1
79
-
80
- state_to_jump_forward = {}
81
- for (state, id_), next_state in transitions.items():
82
- if id_ == fsm_info.alphabet_anything_value:
83
- # Arbitrarily symbol cannot be recognized as jump forward
84
- continue
85
-
86
- symbols = id_to_symbol[id_]
87
- for c in symbols:
88
- if len(c) > 1:
89
- # Skip byte level transitions like c = "5E"
90
- continue
91
-
92
- outgoings_ct[state] += 1
93
- if outgoings_ct[state] > 1:
94
- if state in state_to_jump_forward:
95
- del state_to_jump_forward[state]
96
- break
97
-
98
- state_to_jump_forward[state] = JumpEdge(
99
- symbol=c,
100
- symbol_next_state=next_state,
101
- )
102
-
103
- # Process the byte level jump forward
104
- outgoings_ct = defaultdict(int)
105
- for s in fsm_info.finals:
106
- outgoings_ct[s] = 1
107
-
108
- for (state, id_), next_state in transitions.items():
109
- if id_ == fsm_info.alphabet_anything_value:
110
- continue
111
- symbols = id_to_symbol[id_]
112
- for c in symbols:
113
- byte_ = None
114
- if len(c) == 1 and ord(c) < 0x80:
115
- # ASCII character
116
- byte_ = ord(c)
117
- elif len(c) > 1:
118
- # FIXME: This logic is due to the leading \x00
119
- # https://github.com/outlines-dev/outlines/pull/930
120
- byte_ = int(symbols[0][1:], 16)
121
-
122
- if byte_ is not None:
123
- outgoings_ct[state] += 1
124
- if outgoings_ct[state] > 1:
125
- if state in state_to_jump_forward:
126
- del state_to_jump_forward[state]
127
- break
128
- e = state_to_jump_forward.get(state, JumpEdge())
129
- e.byte = byte_
130
- e.byte_next_state = next_state
131
- state_to_jump_forward[state] = e
132
-
133
- return state_to_jump_forward
134
-
135
- self.state_to_jump_forward = _init_state_to_jump_forward(regex_string)
136
-
137
- def jump_forward_symbol(self, state):
138
- jump_forward_str = ""
139
- next_state = state
140
- while state in self.state_to_jump_forward:
141
- e = self.state_to_jump_forward[state]
142
- if e.symbol is None:
143
- break
144
- jump_forward_str += e.symbol
145
- next_state = e.symbol_next_state
146
- state = next_state
147
-
148
- return jump_forward_str, next_state
149
-
150
- def jump_forward_byte(self, state):
151
- if state not in self.state_to_jump_forward:
152
- return None
153
-
154
- jump_forward_bytes = []
155
- next_state = None
156
- while state in self.state_to_jump_forward:
157
- e = self.state_to_jump_forward[state]
158
- assert e.byte is not None and e.byte_next_state is not None
159
- jump_forward_bytes.append((e.byte, e.byte_next_state))
160
- next_state = e.byte_next_state
161
- state = next_state
162
-
163
- return jump_forward_bytes
164
-
165
- def is_jump_forward_symbol_state(self, state):
166
- return (
167
- state in self.state_to_jump_forward
168
- and self.state_to_jump_forward[state].symbol is not None
169
- )
170
-
171
-
172
- class JumpForwardCache(BaseToolCache):
173
- def __init__(self):
174
- super().__init__()
175
-
176
- def init_value(self, regex):
177
- forward_map = JumpForwardMap(regex)
178
- if forward_map.state_to_jump_forward:
179
- return forward_map
180
- else:
181
- return None
182
-
183
-
184
- def test_main(regex_string):
185
- jump_forward_map = JumpForwardMap(regex_string)
186
- for state, e in jump_forward_map.state_to_jump_forward.items():
187
- if e.symbol is not None:
188
- jump_forward_str, next_state = jump_forward_map.jump_forward_symbol(state)
189
- print(f"{state} -> {next_state}", jump_forward_str)
190
- bytes_ = jump_forward_map.jump_forward_byte(state)
191
- print(f"{state} -> {bytes_[-1][1]}", [hex(b) for b, _ in bytes_])
192
-
193
-
194
- if __name__ == "__main__":
195
- import outlines
196
-
197
- outlines.caching.clear_cache()
198
- test_main(r"The google's DNS sever address is " + IP_REGEX)
199
- test_main(r"霍格沃茨特快列车|霍比特人比尔博")
200
- # 霍格: \xe9\x9c\x8d \xe6\xa0\xbc ...
201
- # 霍比: \xe9\x9c\x8d \xe6\xaf\x94 ...
202
-
203
- test_main(r"[-+]?[0-9]+[ ]*")