sglang 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +5 -1
- sglang/api.py +8 -3
- sglang/backend/anthropic.py +1 -1
- sglang/backend/litellm.py +90 -0
- sglang/backend/openai.py +148 -12
- sglang/backend/runtime_endpoint.py +18 -10
- sglang/global_config.py +11 -1
- sglang/lang/chat_template.py +9 -2
- sglang/lang/interpreter.py +161 -81
- sglang/lang/ir.py +29 -11
- sglang/lang/tracer.py +1 -1
- sglang/launch_server.py +1 -2
- sglang/launch_server_llavavid.py +31 -0
- sglang/srt/constrained/fsm_cache.py +3 -0
- sglang/srt/flush_cache.py +16 -0
- sglang/srt/hf_transformers_utils.py +83 -2
- sglang/srt/layers/extend_attention.py +17 -0
- sglang/srt/layers/fused_moe.py +485 -0
- sglang/srt/layers/logits_processor.py +12 -7
- sglang/srt/layers/radix_attention.py +10 -3
- sglang/srt/layers/token_attention.py +16 -1
- sglang/srt/managers/controller/dp_worker.py +110 -0
- sglang/srt/managers/controller/infer_batch.py +619 -0
- sglang/srt/managers/controller/manager_multi.py +191 -0
- sglang/srt/managers/controller/manager_single.py +97 -0
- sglang/srt/managers/controller/model_runner.py +462 -0
- sglang/srt/managers/controller/radix_cache.py +267 -0
- sglang/srt/managers/controller/schedule_heuristic.py +59 -0
- sglang/srt/managers/controller/tp_worker.py +791 -0
- sglang/srt/managers/detokenizer_manager.py +45 -45
- sglang/srt/managers/io_struct.py +26 -10
- sglang/srt/managers/router/infer_batch.py +130 -74
- sglang/srt/managers/router/manager.py +7 -9
- sglang/srt/managers/router/model_rpc.py +224 -135
- sglang/srt/managers/router/model_runner.py +94 -107
- sglang/srt/managers/router/radix_cache.py +54 -18
- sglang/srt/managers/router/scheduler.py +23 -34
- sglang/srt/managers/tokenizer_manager.py +183 -88
- sglang/srt/model_config.py +5 -2
- sglang/srt/models/commandr.py +15 -22
- sglang/srt/models/dbrx.py +22 -29
- sglang/srt/models/gemma.py +14 -24
- sglang/srt/models/grok.py +671 -0
- sglang/srt/models/llama2.py +24 -23
- sglang/srt/models/llava.py +85 -25
- sglang/srt/models/llavavid.py +298 -0
- sglang/srt/models/mixtral.py +254 -130
- sglang/srt/models/mixtral_quant.py +373 -0
- sglang/srt/models/qwen.py +28 -25
- sglang/srt/models/qwen2.py +17 -22
- sglang/srt/models/stablelm.py +21 -26
- sglang/srt/models/yivl.py +17 -25
- sglang/srt/openai_api_adapter.py +140 -95
- sglang/srt/openai_protocol.py +10 -1
- sglang/srt/server.py +101 -52
- sglang/srt/server_args.py +59 -11
- sglang/srt/utils.py +242 -75
- sglang/test/test_programs.py +44 -0
- sglang/test/test_utils.py +32 -1
- sglang/utils.py +95 -26
- {sglang-0.1.15.dist-info → sglang-0.1.17.dist-info}/METADATA +23 -13
- sglang-0.1.17.dist-info/RECORD +81 -0
- sglang/srt/backend_config.py +0 -13
- sglang/srt/models/dbrx_config.py +0 -281
- sglang/srt/weight_utils.py +0 -402
- sglang-0.1.15.dist-info/RECORD +0 -69
- {sglang-0.1.15.dist-info → sglang-0.1.17.dist-info}/LICENSE +0 -0
- {sglang-0.1.15.dist-info → sglang-0.1.17.dist-info}/WHEEL +0 -0
- {sglang-0.1.15.dist-info → sglang-0.1.17.dist-info}/top_level.txt +0 -0
sglang/srt/models/yivl.py
CHANGED
@@ -1,43 +1,38 @@
|
|
1
1
|
"""Inference-only Yi-VL model."""
|
2
2
|
|
3
|
-
import
|
4
|
-
from typing import List, Optional
|
3
|
+
from typing import Tuple, Iterable, Optional
|
5
4
|
|
6
5
|
import torch
|
7
6
|
import torch.nn as nn
|
8
7
|
from transformers import CLIPVisionModel, LlavaConfig
|
9
|
-
from
|
10
|
-
|
11
|
-
hf_model_weights_iterator,
|
12
|
-
)
|
8
|
+
from vllm.config import CacheConfig
|
9
|
+
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
13
10
|
|
11
|
+
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
14
12
|
from sglang.srt.models.llava import (
|
15
13
|
LlavaLlamaForCausalLM,
|
16
|
-
clip_vision_embed_forward,
|
17
14
|
monkey_path_clip_vision_embed_forward,
|
18
15
|
)
|
19
16
|
|
20
17
|
|
21
18
|
class YiVLForCausalLM(LlavaLlamaForCausalLM):
|
22
|
-
def __init__(
|
23
|
-
self
|
24
|
-
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
config: LlavaConfig,
|
22
|
+
quant_config: Optional[QuantizationConfig] = None,
|
23
|
+
cache_config: Optional[CacheConfig] = None,
|
24
|
+
) -> None:
|
25
|
+
super().__init__(config, quant_config, cache_config)
|
25
26
|
|
26
27
|
self.multi_modal_projector = YiVLMultiModalProjector(self.config)
|
27
28
|
self.vision_tower_subfolder = self.config.mm_vision_tower.replace(
|
28
29
|
"./", ""
|
29
30
|
) # Everything after "./"
|
30
31
|
|
31
|
-
def load_weights(
|
32
|
-
self,
|
33
|
-
model_name_or_path: str,
|
34
|
-
cache_dir: Optional[str] = None,
|
35
|
-
load_format: str = "auto",
|
36
|
-
revision: Optional[str] = None,
|
37
|
-
):
|
32
|
+
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
|
38
33
|
# We have to use the subfolder of the main model directory (e.g. 01-ai/Yi-VL-6B)
|
39
34
|
self.vision_tower = CLIPVisionModel.from_pretrained(
|
40
|
-
|
35
|
+
self.config._name_or_path,
|
41
36
|
torch_dtype=torch.float16,
|
42
37
|
subfolder=self.vision_tower_subfolder,
|
43
38
|
).cuda()
|
@@ -71,9 +66,8 @@ class YiVLForCausalLM(LlavaLlamaForCausalLM):
|
|
71
66
|
"model.vision_tower.vision_tower": "vision_tower", # Update the vision tower weights if we find them in the checkpoint (it may be finetuned).
|
72
67
|
}
|
73
68
|
params_dict = dict(self.named_parameters())
|
74
|
-
|
75
|
-
|
76
|
-
):
|
69
|
+
weights = list(weights)
|
70
|
+
for name, loaded_weight in weights:
|
77
71
|
if "projector" in name or "vision_tower" in name:
|
78
72
|
for weight_name, param_name in projector_weights.items():
|
79
73
|
if weight_name in name:
|
@@ -83,9 +77,7 @@ class YiVLForCausalLM(LlavaLlamaForCausalLM):
|
|
83
77
|
weight_loader(param, loaded_weight)
|
84
78
|
|
85
79
|
# load language model
|
86
|
-
self.language_model.load_weights(
|
87
|
-
model_name_or_path, cache_dir, load_format, revision
|
88
|
-
)
|
80
|
+
self.language_model.load_weights(weights)
|
89
81
|
|
90
82
|
monkey_path_clip_vision_embed_forward()
|
91
83
|
|
@@ -106,7 +98,7 @@ class YiVLMultiModalProjector(nn.Module):
|
|
106
98
|
|
107
99
|
def forward(self, image_features):
|
108
100
|
hidden_states = self.linear_1(image_features)
|
109
|
-
|
101
|
+
hidden_states = self.ln_1(hidden_states)
|
110
102
|
hidden_states = self.act(hidden_states)
|
111
103
|
hidden_states = self.linear_2(hidden_states)
|
112
104
|
hidden_states = self.ln_2(hidden_states)
|
sglang/srt/openai_api_adapter.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
"""Conversion between OpenAI APIs and native SRT APIs"""
|
2
|
+
|
3
|
+
import asyncio
|
2
4
|
import json
|
3
5
|
import os
|
6
|
+
from http import HTTPStatus
|
4
7
|
|
5
|
-
from fastapi import
|
6
|
-
from fastapi.responses import StreamingResponse
|
8
|
+
from fastapi import Request
|
9
|
+
from fastapi.responses import StreamingResponse, JSONResponse
|
7
10
|
|
8
11
|
from sglang.srt.conversation import (
|
9
12
|
Conversation,
|
@@ -26,14 +29,36 @@ from sglang.srt.openai_protocol import (
|
|
26
29
|
CompletionResponseStreamChoice,
|
27
30
|
CompletionStreamResponse,
|
28
31
|
DeltaMessage,
|
32
|
+
ErrorResponse,
|
29
33
|
LogProbs,
|
30
34
|
UsageInfo,
|
31
35
|
)
|
32
|
-
from sglang.srt.utils import jsonify_pydantic_model
|
33
|
-
|
34
36
|
|
35
37
|
chat_template_name = None
|
36
38
|
|
39
|
+
|
40
|
+
def create_error_response(
|
41
|
+
message: str,
|
42
|
+
err_type: str = "BadRequestError",
|
43
|
+
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST):
|
44
|
+
error = ErrorResponse(message=message,
|
45
|
+
type=err_type,
|
46
|
+
code=status_code.value)
|
47
|
+
return JSONResponse(content=error.model_dump(),
|
48
|
+
status_code=error.code)
|
49
|
+
|
50
|
+
|
51
|
+
def create_streaming_error_response(
|
52
|
+
message: str,
|
53
|
+
err_type: str = "BadRequestError",
|
54
|
+
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> str:
|
55
|
+
error = ErrorResponse(message=message,
|
56
|
+
type=err_type,
|
57
|
+
code=status_code.value)
|
58
|
+
json_str = json.dumps({"error": error.model_dump()})
|
59
|
+
return json_str
|
60
|
+
|
61
|
+
|
37
62
|
def load_chat_template_for_openai_api(chat_template_arg):
|
38
63
|
global chat_template_name
|
39
64
|
|
@@ -73,8 +98,8 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
|
|
73
98
|
request_json = await raw_request.json()
|
74
99
|
request = CompletionRequest(**request_json)
|
75
100
|
|
76
|
-
|
77
|
-
|
101
|
+
if request.n != 1:
|
102
|
+
return create_error_response("n != 1 is not supported")
|
78
103
|
|
79
104
|
adapted_request = GenerateReqInput(
|
80
105
|
text=request.prompt,
|
@@ -92,79 +117,88 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
|
|
92
117
|
return_text_in_logprobs=True,
|
93
118
|
stream=request.stream,
|
94
119
|
)
|
95
|
-
adapted_request.post_init()
|
96
120
|
|
97
121
|
if adapted_request.stream:
|
98
122
|
|
99
123
|
async def generate_stream_resp():
|
100
124
|
stream_buffer = ""
|
101
125
|
n_prev_token = 0
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
if
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
126
|
+
try:
|
127
|
+
async for content in tokenizer_manager.generate_request(
|
128
|
+
adapted_request, raw_request):
|
129
|
+
text = content["text"]
|
130
|
+
prompt_tokens = content["meta_info"]["prompt_tokens"]
|
131
|
+
completion_tokens = content["meta_info"]["completion_tokens"]
|
132
|
+
|
133
|
+
if not stream_buffer: # The first chunk
|
134
|
+
if request.echo:
|
135
|
+
# Prepend prompt in response text.
|
136
|
+
text = request.prompt + text
|
137
|
+
|
138
|
+
if request.logprobs:
|
139
|
+
# The first chunk and echo is enabled.
|
140
|
+
if not stream_buffer and request.echo:
|
141
|
+
prefill_token_logprobs = content["meta_info"][
|
142
|
+
"prefill_token_logprobs"
|
143
|
+
]
|
144
|
+
prefill_top_logprobs = content["meta_info"][
|
145
|
+
"prefill_top_logprobs"
|
146
|
+
]
|
147
|
+
else:
|
148
|
+
prefill_token_logprobs = None
|
149
|
+
prefill_top_logprobs = None
|
150
|
+
|
151
|
+
logprobs = to_openai_style_logprobs(
|
152
|
+
prefill_token_logprobs=prefill_token_logprobs,
|
153
|
+
prefill_top_logprobs=prefill_top_logprobs,
|
154
|
+
decode_token_logprobs=content["meta_info"][
|
155
|
+
"decode_token_logprobs"
|
156
|
+
][n_prev_token:],
|
157
|
+
decode_top_logprobs=content["meta_info"]["decode_top_logprobs"][
|
158
|
+
n_prev_token:
|
159
|
+
],
|
160
|
+
)
|
161
|
+
|
162
|
+
n_prev_token = len(content["meta_info"]["decode_token_logprobs"])
|
121
163
|
else:
|
122
|
-
|
123
|
-
prefill_top_logprobs = None
|
124
|
-
|
125
|
-
logprobs = to_openai_style_logprobs(
|
126
|
-
prefill_token_logprobs=prefill_token_logprobs,
|
127
|
-
prefill_top_logprobs=prefill_top_logprobs,
|
128
|
-
decode_token_logprobs=content["meta_info"][
|
129
|
-
"decode_token_logprobs"
|
130
|
-
][n_prev_token:],
|
131
|
-
decode_top_logprobs=content["meta_info"]["decode_top_logprobs"][
|
132
|
-
n_prev_token:
|
133
|
-
],
|
134
|
-
)
|
164
|
+
logprobs = None
|
135
165
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
|
166
|
+
delta = text[len(stream_buffer) :]
|
167
|
+
stream_buffer = content["text"]
|
168
|
+
choice_data = CompletionResponseStreamChoice(
|
169
|
+
index=0,
|
170
|
+
text=delta,
|
171
|
+
logprobs=logprobs,
|
172
|
+
finish_reason=content["meta_info"]["finish_reason"],
|
173
|
+
)
|
174
|
+
chunk = CompletionStreamResponse(
|
175
|
+
id=content["meta_info"]["id"],
|
176
|
+
object="text_completion",
|
177
|
+
choices=[choice_data],
|
178
|
+
model=request.model,
|
179
|
+
usage=UsageInfo(
|
180
|
+
prompt_tokens=prompt_tokens,
|
181
|
+
completion_tokens=completion_tokens,
|
182
|
+
total_tokens=prompt_tokens + completion_tokens,
|
183
|
+
),
|
184
|
+
)
|
185
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
186
|
+
except ValueError as e:
|
187
|
+
error = create_streaming_error_response(str(e))
|
188
|
+
yield f"data: {error}\n\n"
|
160
189
|
yield "data: [DONE]\n\n"
|
161
190
|
|
162
|
-
return StreamingResponse(generate_stream_resp(), media_type="text/event-stream"
|
191
|
+
return StreamingResponse(generate_stream_resp(), media_type="text/event-stream",
|
192
|
+
background=tokenizer_manager.create_abort_task(adapted_request))
|
163
193
|
|
164
194
|
# Non-streaming response.
|
165
|
-
|
166
|
-
|
195
|
+
try:
|
196
|
+
ret = await tokenizer_manager.generate_request(
|
197
|
+
adapted_request, raw_request).__anext__()
|
198
|
+
except ValueError as e:
|
199
|
+
return create_error_response(str(e))
|
167
200
|
|
201
|
+
ret = ret[0] if isinstance(ret, list) else ret
|
168
202
|
prompt_tokens = ret["meta_info"]["prompt_tokens"]
|
169
203
|
completion_tokens = ret["meta_info"]["completion_tokens"]
|
170
204
|
text = ret["text"]
|
@@ -192,7 +226,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
|
|
192
226
|
index=0,
|
193
227
|
text=text,
|
194
228
|
logprobs=logprobs,
|
195
|
-
finish_reason=
|
229
|
+
finish_reason=ret["meta_info"]["finish_reason"],
|
196
230
|
)
|
197
231
|
response = CompletionResponse(
|
198
232
|
id=ret["meta_info"]["id"],
|
@@ -211,8 +245,8 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
|
211
245
|
request_json = await raw_request.json()
|
212
246
|
request = ChatCompletionRequest(**request_json)
|
213
247
|
|
214
|
-
|
215
|
-
|
248
|
+
if request.n != 1:
|
249
|
+
return create_error_response("n != 1 is not supported")
|
216
250
|
|
217
251
|
# Prep the data needed for the underlying GenerateReqInput:
|
218
252
|
# - prompt: The full prompt string.
|
@@ -257,7 +291,6 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
|
257
291
|
},
|
258
292
|
stream=request.stream,
|
259
293
|
)
|
260
|
-
adapted_request.post_init()
|
261
294
|
|
262
295
|
if adapted_request.stream:
|
263
296
|
|
@@ -265,46 +298,58 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
|
265
298
|
is_first = True
|
266
299
|
|
267
300
|
stream_buffer = ""
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
301
|
+
try:
|
302
|
+
async for content in tokenizer_manager.generate_request(adapted_request, raw_request):
|
303
|
+
if is_first:
|
304
|
+
# First chunk with role
|
305
|
+
is_first = False
|
306
|
+
choice_data = ChatCompletionResponseStreamChoice(
|
307
|
+
index=0,
|
308
|
+
delta=DeltaMessage(role="assistant"),
|
309
|
+
finish_reason=content["meta_info"]["finish_reason"],
|
310
|
+
)
|
311
|
+
chunk = ChatCompletionStreamResponse(
|
312
|
+
id=content["meta_info"]["id"],
|
313
|
+
choices=[choice_data],
|
314
|
+
model=request.model,
|
315
|
+
)
|
316
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
317
|
+
|
318
|
+
text = content["text"]
|
319
|
+
delta = text[len(stream_buffer) :]
|
320
|
+
stream_buffer = text
|
272
321
|
choice_data = ChatCompletionResponseStreamChoice(
|
273
322
|
index=0,
|
274
|
-
delta=DeltaMessage(
|
275
|
-
finish_reason=
|
323
|
+
delta=DeltaMessage(content=delta),
|
324
|
+
finish_reason=content["meta_info"]["finish_reason"],
|
276
325
|
)
|
277
326
|
chunk = ChatCompletionStreamResponse(
|
278
327
|
id=content["meta_info"]["id"],
|
279
328
|
choices=[choice_data],
|
280
329
|
model=request.model,
|
281
330
|
)
|
282
|
-
yield f"data: {
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
stream_buffer = text
|
287
|
-
choice_data = ChatCompletionResponseStreamChoice(
|
288
|
-
index=0, delta=DeltaMessage(content=delta), finish_reason=None
|
289
|
-
)
|
290
|
-
chunk = ChatCompletionStreamResponse(
|
291
|
-
id=content["meta_info"]["id"],
|
292
|
-
choices=[choice_data],
|
293
|
-
model=request.model,
|
294
|
-
)
|
295
|
-
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
|
331
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
332
|
+
except ValueError as e:
|
333
|
+
error = create_streaming_error_response(str(e))
|
334
|
+
yield f"data: {error}\n\n"
|
296
335
|
yield "data: [DONE]\n\n"
|
297
336
|
|
298
|
-
return StreamingResponse(generate_stream_resp(), media_type="text/event-stream"
|
337
|
+
return StreamingResponse(generate_stream_resp(), media_type="text/event-stream",
|
338
|
+
background=tokenizer_manager.create_abort_task(adapted_request))
|
299
339
|
|
300
340
|
# Non-streaming response.
|
301
|
-
|
341
|
+
try:
|
342
|
+
ret = await tokenizer_manager.generate_request(
|
343
|
+
adapted_request, raw_request).__anext__()
|
344
|
+
except ValueError as e:
|
345
|
+
return create_error_response(str(e))
|
346
|
+
|
302
347
|
prompt_tokens = ret["meta_info"]["prompt_tokens"]
|
303
348
|
completion_tokens = ret["meta_info"]["completion_tokens"]
|
304
349
|
choice_data = ChatCompletionResponseChoice(
|
305
350
|
index=0,
|
306
351
|
message=ChatMessage(role="assistant", content=ret["text"]),
|
307
|
-
finish_reason=
|
352
|
+
finish_reason=ret["meta_info"]["finish_reason"],
|
308
353
|
)
|
309
354
|
response = ChatCompletionResponse(
|
310
355
|
id=ret["meta_info"]["id"],
|
@@ -332,7 +377,7 @@ def to_openai_style_logprobs(
|
|
332
377
|
ret_logprobs.tokens.append(token_text)
|
333
378
|
ret_logprobs.token_logprobs.append(logprob)
|
334
379
|
|
335
|
-
# Not
|
380
|
+
# Not supported yet
|
336
381
|
ret_logprobs.text_offset.append(-1)
|
337
382
|
|
338
383
|
def append_top_logprobs(top_logprobs):
|
@@ -353,4 +398,4 @@ def to_openai_style_logprobs(
|
|
353
398
|
if decode_top_logprobs is not None:
|
354
399
|
append_top_logprobs(decode_top_logprobs)
|
355
400
|
|
356
|
-
return ret_logprobs
|
401
|
+
return ret_logprobs
|
sglang/srt/openai_protocol.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""pydantic models for OpenAI API protocol"""
|
2
|
+
|
2
3
|
import time
|
3
4
|
from typing import Dict, List, Optional, Union
|
4
5
|
|
@@ -6,6 +7,14 @@ from pydantic import BaseModel, Field
|
|
6
7
|
from typing_extensions import Literal
|
7
8
|
|
8
9
|
|
10
|
+
class ErrorResponse(BaseModel):
|
11
|
+
object: str = "error"
|
12
|
+
message: str
|
13
|
+
type: str
|
14
|
+
param: Optional[str] = None
|
15
|
+
code: int
|
16
|
+
|
17
|
+
|
9
18
|
class LogProbs(BaseModel):
|
10
19
|
text_offset: List[int] = Field(default_factory=list)
|
11
20
|
token_logprobs: List[Optional[float]] = Field(default_factory=list)
|
@@ -178,4 +187,4 @@ class ChatCompletionStreamResponse(BaseModel):
|
|
178
187
|
object: str = "chat.completion.chunk"
|
179
188
|
created: int = Field(default_factory=lambda: int(time.time()))
|
180
189
|
model: str
|
181
|
-
choices: List[ChatCompletionResponseStreamChoice]
|
190
|
+
choices: List[ChatCompletionResponseStreamChoice]
|