sglang 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +55 -2
- sglang/api.py +3 -5
- sglang/backend/anthropic.py +18 -4
- sglang/backend/openai.py +2 -1
- sglang/backend/runtime_endpoint.py +18 -5
- sglang/backend/vertexai.py +1 -0
- sglang/global_config.py +1 -0
- sglang/lang/chat_template.py +74 -0
- sglang/lang/interpreter.py +40 -16
- sglang/lang/tracer.py +6 -4
- sglang/launch_server.py +2 -1
- sglang/srt/constrained/fsm_cache.py +1 -0
- sglang/srt/constrained/jump_forward.py +1 -0
- sglang/srt/conversation.py +2 -2
- sglang/srt/hf_transformers_utils.py +2 -1
- sglang/srt/layers/context_flashattention_nopad.py +1 -0
- sglang/srt/layers/extend_attention.py +1 -0
- sglang/srt/layers/logits_processor.py +114 -54
- sglang/srt/layers/radix_attention.py +2 -1
- sglang/srt/layers/token_attention.py +1 -0
- sglang/srt/managers/detokenizer_manager.py +5 -1
- sglang/srt/managers/io_struct.py +12 -0
- sglang/srt/managers/router/infer_batch.py +70 -33
- sglang/srt/managers/router/manager.py +7 -2
- sglang/srt/managers/router/model_rpc.py +116 -73
- sglang/srt/managers/router/model_runner.py +111 -167
- sglang/srt/managers/router/radix_cache.py +46 -38
- sglang/srt/managers/tokenizer_manager.py +56 -11
- sglang/srt/memory_pool.py +5 -14
- sglang/srt/model_config.py +7 -0
- sglang/srt/models/commandr.py +376 -0
- sglang/srt/models/dbrx.py +413 -0
- sglang/srt/models/dbrx_config.py +281 -0
- sglang/srt/models/gemma.py +22 -20
- sglang/srt/models/llama2.py +23 -21
- sglang/srt/models/llava.py +12 -10
- sglang/srt/models/mixtral.py +27 -25
- sglang/srt/models/qwen.py +23 -21
- sglang/srt/models/qwen2.py +23 -21
- sglang/srt/models/stablelm.py +20 -21
- sglang/srt/models/yivl.py +6 -5
- sglang/srt/openai_api_adapter.py +356 -0
- sglang/srt/{managers/openai_protocol.py → openai_protocol.py} +36 -20
- sglang/srt/sampling_params.py +2 -0
- sglang/srt/server.py +68 -447
- sglang/srt/server_args.py +76 -49
- sglang/srt/utils.py +88 -32
- sglang/srt/weight_utils.py +402 -0
- sglang/test/test_programs.py +8 -7
- sglang/test/test_utils.py +195 -7
- {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/METADATA +12 -14
- sglang-0.1.15.dist-info/RECORD +69 -0
- sglang-0.1.14.dist-info/RECORD +0 -64
- {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/LICENSE +0 -0
- {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/WHEEL +0 -0
- {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
"""pydantic models for OpenAI API protocol"""
|
1
2
|
import time
|
2
3
|
from typing import Dict, List, Optional, Union
|
3
4
|
|
@@ -19,21 +20,24 @@ class UsageInfo(BaseModel):
|
|
19
20
|
|
20
21
|
|
21
22
|
class CompletionRequest(BaseModel):
|
23
|
+
# Ordered by official OpenAI API documentation
|
24
|
+
# https://platform.openai.com/docs/api-reference/completions/create
|
22
25
|
model: str
|
23
|
-
prompt: Union[str, List[str]]
|
24
|
-
|
25
|
-
max_tokens: Optional[int] = 16
|
26
|
-
temperature: Optional[float] = 0.7
|
27
|
-
top_p: Optional[float] = 1.0
|
28
|
-
n: Optional[int] = 1
|
29
|
-
stream: Optional[bool] = False
|
30
|
-
logprobs: Optional[int] = None
|
26
|
+
prompt: Union[List[int], List[List[int]], str, List[str]]
|
27
|
+
best_of: Optional[int] = None
|
31
28
|
echo: Optional[bool] = False
|
32
|
-
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
33
|
-
presence_penalty: Optional[float] = 0.0
|
34
29
|
frequency_penalty: Optional[float] = 0.0
|
35
|
-
best_of: Optional[int] = None
|
36
30
|
logit_bias: Optional[Dict[str, float]] = None
|
31
|
+
logprobs: Optional[int] = None
|
32
|
+
max_tokens: Optional[int] = 16
|
33
|
+
n: int = 1
|
34
|
+
presence_penalty: Optional[float] = 0.0
|
35
|
+
seed: Optional[int] = None
|
36
|
+
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
37
|
+
stream: Optional[bool] = False
|
38
|
+
suffix: Optional[str] = None
|
39
|
+
temperature: Optional[float] = 1.0
|
40
|
+
top_p: Optional[float] = 1.0
|
37
41
|
user: Optional[str] = None
|
38
42
|
|
39
43
|
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
@@ -107,20 +111,30 @@ ChatCompletionMessageParam = Union[
|
|
107
111
|
]
|
108
112
|
|
109
113
|
|
114
|
+
class ResponseFormat(BaseModel):
|
115
|
+
# type must be "json_object" or "text"
|
116
|
+
type: Literal["text", "json_object"]
|
117
|
+
|
118
|
+
|
110
119
|
class ChatCompletionRequest(BaseModel):
|
120
|
+
# Ordered by official OpenAI API documentation
|
121
|
+
# https://platform.openai.com/docs/api-reference/chat/create
|
122
|
+
messages: List[ChatCompletionMessageParam]
|
111
123
|
model: str
|
112
|
-
|
113
|
-
|
114
|
-
|
124
|
+
frequency_penalty: Optional[float] = 0.0
|
125
|
+
logit_bias: Optional[Dict[str, float]] = None
|
126
|
+
logprobs: Optional[bool] = False
|
127
|
+
top_logprobs: Optional[int] = None
|
128
|
+
max_tokens: Optional[int] = None
|
115
129
|
n: Optional[int] = 1
|
116
|
-
|
130
|
+
presence_penalty: Optional[float] = 0.0
|
131
|
+
response_format: Optional[ResponseFormat] = None
|
132
|
+
seed: Optional[int] = None
|
117
133
|
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
118
134
|
stream: Optional[bool] = False
|
119
|
-
|
120
|
-
|
121
|
-
logit_bias: Optional[Dict[str, float]] = None
|
135
|
+
temperature: Optional[float] = 0.7
|
136
|
+
top_p: Optional[float] = 1.0
|
122
137
|
user: Optional[str] = None
|
123
|
-
best_of: Optional[int] = None
|
124
138
|
|
125
139
|
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
|
126
140
|
regex: Optional[str] = None
|
@@ -134,6 +148,7 @@ class ChatMessage(BaseModel):
|
|
134
148
|
class ChatCompletionResponseChoice(BaseModel):
|
135
149
|
index: int
|
136
150
|
message: ChatMessage
|
151
|
+
logprobs: Optional[LogProbs] = None
|
137
152
|
finish_reason: Optional[str] = None
|
138
153
|
|
139
154
|
|
@@ -154,6 +169,7 @@ class DeltaMessage(BaseModel):
|
|
154
169
|
class ChatCompletionResponseStreamChoice(BaseModel):
|
155
170
|
index: int
|
156
171
|
delta: DeltaMessage
|
172
|
+
logprobs: Optional[LogProbs] = None
|
157
173
|
finish_reason: Optional[str] = None
|
158
174
|
|
159
175
|
|
@@ -162,4 +178,4 @@ class ChatCompletionStreamResponse(BaseModel):
|
|
162
178
|
object: str = "chat.completion.chunk"
|
163
179
|
created: int = Field(default_factory=lambda: int(time.time()))
|
164
180
|
model: str
|
165
|
-
choices: List[ChatCompletionResponseStreamChoice]
|
181
|
+
choices: List[ChatCompletionResponseStreamChoice]
|
sglang/srt/sampling_params.py
CHANGED
@@ -17,6 +17,7 @@ class SamplingParams:
|
|
17
17
|
presence_penalty: float = 0.0,
|
18
18
|
ignore_eos: bool = False,
|
19
19
|
skip_special_tokens: bool = True,
|
20
|
+
spaces_between_special_tokens: bool = True,
|
20
21
|
dtype: Optional[str] = None,
|
21
22
|
regex: Optional[str] = None,
|
22
23
|
) -> None:
|
@@ -29,6 +30,7 @@ class SamplingParams:
|
|
29
30
|
self.max_new_tokens = max_new_tokens
|
30
31
|
self.ignore_eos = ignore_eos
|
31
32
|
self.skip_special_tokens = skip_special_tokens
|
33
|
+
self.spaces_between_special_tokens = spaces_between_special_tokens
|
32
34
|
self.dtype = dtype
|
33
35
|
self.regex = regex
|
34
36
|
|