sglang 0.1.22__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sglang/__init__.py +2 -2
  2. sglang/bench_serving.py +243 -25
  3. sglang/global_config.py +3 -2
  4. sglang/lang/interpreter.py +1 -0
  5. sglang/srt/hf_transformers_utils.py +13 -1
  6. sglang/srt/layers/logits_processor.py +4 -5
  7. sglang/srt/layers/radix_attention.py +38 -49
  8. sglang/srt/managers/controller/cuda_graph_runner.py +58 -16
  9. sglang/srt/managers/controller/infer_batch.py +51 -22
  10. sglang/srt/managers/controller/model_runner.py +58 -4
  11. sglang/srt/managers/controller/schedule_heuristic.py +8 -3
  12. sglang/srt/managers/controller/tp_worker.py +9 -11
  13. sglang/srt/memory_pool.py +13 -5
  14. sglang/srt/models/deepseek.py +430 -0
  15. sglang/srt/models/gpt_bigcode.py +282 -0
  16. sglang/srt/models/llama2.py +19 -10
  17. sglang/srt/server.py +26 -1
  18. sglang/srt/server_args.py +12 -6
  19. sglang/srt/utils.py +93 -1
  20. sglang/version.py +1 -0
  21. {sglang-0.1.22.dist-info → sglang-0.1.25.dist-info}/METADATA +10 -6
  22. {sglang-0.1.22.dist-info → sglang-0.1.25.dist-info}/RECORD +25 -36
  23. {sglang-0.1.22.dist-info → sglang-0.1.25.dist-info}/WHEEL +1 -1
  24. sglang/backend/__init__.py +0 -0
  25. sglang/backend/anthropic.py +0 -77
  26. sglang/backend/base_backend.py +0 -80
  27. sglang/backend/litellm.py +0 -90
  28. sglang/backend/openai.py +0 -438
  29. sglang/backend/runtime_endpoint.py +0 -283
  30. sglang/backend/vertexai.py +0 -149
  31. sglang/bench.py +0 -627
  32. sglang/srt/managers/controller/dp_worker.py +0 -113
  33. sglang/srt/openai_api/api_adapter.py +0 -432
  34. sglang/srt/openai_api/openai_api_adapter.py +0 -431
  35. sglang/srt/openai_api/openai_protocol.py +0 -207
  36. sglang/srt/openai_api_adapter.py +0 -411
  37. sglang/srt/openai_protocol.py +0 -207
  38. {sglang-0.1.22.dist-info → sglang-0.1.25.dist-info}/LICENSE +0 -0
  39. {sglang-0.1.22.dist-info → sglang-0.1.25.dist-info}/top_level.txt +0 -0
@@ -1,431 +0,0 @@
1
- """Conversion between OpenAI APIs and native SRT APIs"""
2
-
3
- import asyncio
4
- import json
5
- import os
6
- from http import HTTPStatus
7
-
8
- from fastapi import Request
9
- from fastapi.responses import JSONResponse, StreamingResponse
10
-
11
- from sglang.srt.conversation import (
12
- Conversation,
13
- SeparatorStyle,
14
- chat_template_exists,
15
- generate_chat_conv,
16
- register_conv_template,
17
- )
18
- from sglang.srt.managers.io_struct import GenerateReqInput
19
- from sglang.srt.openai_protocol import (
20
- ChatCompletionRequest,
21
- ChatCompletionResponse,
22
- ChatCompletionResponseChoice,
23
- ChatCompletionResponseStreamChoice,
24
- ChatCompletionStreamResponse,
25
- ChatMessage,
26
- CompletionRequest,
27
- CompletionResponse,
28
- CompletionResponseChoice,
29
- CompletionResponseStreamChoice,
30
- CompletionStreamResponse,
31
- DeltaMessage,
32
- ErrorResponse,
33
- LogProbs,
34
- UsageInfo,
35
- )
36
-
37
- chat_template_name = None
38
-
39
-
40
- def create_error_response(
41
- message: str,
42
- err_type: str = "BadRequestError",
43
- status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
44
- ):
45
- error = ErrorResponse(message=message, type=err_type, code=status_code.value)
46
- return JSONResponse(content=error.model_dump(), status_code=error.code)
47
-
48
-
49
- def create_streaming_error_response(
50
- message: str,
51
- err_type: str = "BadRequestError",
52
- status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
53
- ) -> str:
54
- error = ErrorResponse(message=message, type=err_type, code=status_code.value)
55
- json_str = json.dumps({"error": error.model_dump()})
56
- return json_str
57
-
58
-
59
- def load_chat_template_for_openai_api(chat_template_arg):
60
- global chat_template_name
61
-
62
- print(f"Use chat template: {chat_template_arg}")
63
- if not chat_template_exists(chat_template_arg):
64
- if not os.path.exists(chat_template_arg):
65
- raise RuntimeError(
66
- f"Chat template {chat_template_arg} is not a built-in template name "
67
- "or a valid chat template file path."
68
- )
69
- with open(chat_template_arg, "r") as filep:
70
- template = json.load(filep)
71
- try:
72
- sep_style = SeparatorStyle[template["sep_style"]]
73
- except KeyError:
74
- raise ValueError(
75
- f"Unknown separator style: {template['sep_style']}"
76
- ) from None
77
- register_conv_template(
78
- Conversation(
79
- name=template["name"],
80
- system_template=template["system"] + "\n{system_message}",
81
- system_message=template.get("system_message", ""),
82
- roles=(template["user"], template["assistant"]),
83
- sep_style=sep_style,
84
- sep=template.get("sep", "\n"),
85
- stop_str=template["stop_str"],
86
- ),
87
- override=True,
88
- )
89
- chat_template_name = template["name"]
90
- else:
91
- chat_template_name = chat_template_arg
92
-
93
-
94
- async def v1_completions(tokenizer_manager, raw_request: Request):
95
- request_json = await raw_request.json()
96
- request = CompletionRequest(**request_json)
97
-
98
- adapted_request = GenerateReqInput(
99
- text=request.prompt,
100
- sampling_params={
101
- "temperature": request.temperature,
102
- "max_new_tokens": request.max_tokens,
103
- "stop": request.stop,
104
- "top_p": request.top_p,
105
- "presence_penalty": request.presence_penalty,
106
- "frequency_penalty": request.frequency_penalty,
107
- "regex": request.regex,
108
- "n": request.n,
109
- },
110
- return_logprob=request.logprobs is not None and request.logprobs > 0,
111
- top_logprobs_num=request.logprobs if request.logprobs is not None else 0,
112
- return_text_in_logprobs=True,
113
- stream=request.stream,
114
- )
115
-
116
- if adapted_request.stream:
117
-
118
- async def generate_stream_resp():
119
- stream_buffer = ""
120
- n_prev_token = 0
121
- try:
122
- async for content in tokenizer_manager.generate_request(
123
- adapted_request, raw_request
124
- ):
125
- text = content["text"]
126
- prompt_tokens = content["meta_info"]["prompt_tokens"]
127
- completion_tokens = content["meta_info"]["completion_tokens"]
128
-
129
- if not stream_buffer: # The first chunk
130
- if request.echo:
131
- # Prepend prompt in response text.
132
- text = request.prompt + text
133
-
134
- if request.logprobs:
135
- # The first chunk and echo is enabled.
136
- if not stream_buffer and request.echo:
137
- prefill_token_logprobs = content["meta_info"][
138
- "prefill_token_logprobs"
139
- ]
140
- prefill_top_logprobs = content["meta_info"][
141
- "prefill_top_logprobs"
142
- ]
143
- else:
144
- prefill_token_logprobs = None
145
- prefill_top_logprobs = None
146
-
147
- logprobs = to_openai_style_logprobs(
148
- prefill_token_logprobs=prefill_token_logprobs,
149
- prefill_top_logprobs=prefill_top_logprobs,
150
- decode_token_logprobs=content["meta_info"][
151
- "decode_token_logprobs"
152
- ][n_prev_token:],
153
- decode_top_logprobs=content["meta_info"][
154
- "decode_top_logprobs"
155
- ][n_prev_token:],
156
- )
157
-
158
- n_prev_token = len(
159
- content["meta_info"]["decode_token_logprobs"]
160
- )
161
- else:
162
- logprobs = None
163
-
164
- delta = text[len(stream_buffer) :]
165
- stream_buffer = stream_buffer + delta
166
- choice_data = CompletionResponseStreamChoice(
167
- index=0,
168
- text=delta,
169
- logprobs=logprobs,
170
- finish_reason=content["meta_info"]["finish_reason"],
171
- )
172
- chunk = CompletionStreamResponse(
173
- id=content["meta_info"]["id"],
174
- object="text_completion",
175
- choices=[choice_data],
176
- model=request.model,
177
- usage=UsageInfo(
178
- prompt_tokens=prompt_tokens,
179
- completion_tokens=completion_tokens,
180
- total_tokens=prompt_tokens + completion_tokens,
181
- ),
182
- )
183
- yield f"data: {chunk.model_dump_json()}\n\n"
184
- except ValueError as e:
185
- error = create_streaming_error_response(str(e))
186
- yield f"data: {error}\n\n"
187
- yield "data: [DONE]\n\n"
188
-
189
- return StreamingResponse(
190
- generate_stream_resp(),
191
- media_type="text/event-stream",
192
- background=tokenizer_manager.create_abort_task(adapted_request),
193
- )
194
-
195
- # Non-streaming response.
196
- try:
197
- ret = await tokenizer_manager.generate_request(
198
- adapted_request, raw_request
199
- ).__anext__()
200
- except ValueError as e:
201
- return create_error_response(str(e))
202
-
203
- if not isinstance(ret, list):
204
- ret = [ret]
205
- choices = []
206
-
207
- for idx, ret_item in enumerate(ret):
208
- text = ret_item["text"]
209
-
210
- if request.echo:
211
- text = request.prompt + text
212
-
213
- if request.logprobs:
214
- if request.echo:
215
- prefill_token_logprobs = ret_item["meta_info"]["prefill_token_logprobs"]
216
- prefill_top_logprobs = ret_item["meta_info"]["prefill_top_logprobs"]
217
- else:
218
- prefill_token_logprobs = None
219
- prefill_top_logprobs = None
220
-
221
- logprobs = to_openai_style_logprobs(
222
- prefill_token_logprobs=prefill_token_logprobs,
223
- prefill_top_logprobs=prefill_top_logprobs,
224
- decode_token_logprobs=ret_item["meta_info"]["decode_token_logprobs"],
225
- decode_top_logprobs=ret_item["meta_info"]["decode_top_logprobs"],
226
- )
227
- else:
228
- logprobs = None
229
-
230
- choice_data = CompletionResponseChoice(
231
- index=idx,
232
- text=text,
233
- logprobs=logprobs,
234
- finish_reason=ret_item["meta_info"]["finish_reason"],
235
- )
236
-
237
- choices.append(choice_data)
238
-
239
- response = CompletionResponse(
240
- id=ret[0]["meta_info"]["id"],
241
- model=request.model,
242
- choices=choices,
243
- usage=UsageInfo(
244
- prompt_tokens=ret[0]["meta_info"]["prompt_tokens"],
245
- completion_tokens=sum(
246
- item["meta_info"]["completion_tokens"] for item in ret
247
- ),
248
- total_tokens=ret[0]["meta_info"]["prompt_tokens"]
249
- + sum(item["meta_info"]["completion_tokens"] for item in ret),
250
- ),
251
- )
252
-
253
- return response
254
-
255
-
256
- async def v1_chat_completions(tokenizer_manager, raw_request: Request):
257
- request_json = await raw_request.json()
258
- request = ChatCompletionRequest(**request_json)
259
-
260
- # Prep the data needed for the underlying GenerateReqInput:
261
- # - prompt: The full prompt string.
262
- # - stop: Custom stop tokens.
263
- # - image_data: None or a list of image strings (URLs or base64 strings).
264
- # None skips any image processing in GenerateReqInput.
265
- if not isinstance(request.messages, str):
266
- # Apply chat template and its stop strings.
267
- if chat_template_name is None:
268
- prompt = tokenizer_manager.tokenizer.apply_chat_template(
269
- request.messages, tokenize=False, add_generation_prompt=True
270
- )
271
- stop = request.stop
272
- image_data = None
273
- else:
274
- conv = generate_chat_conv(request, chat_template_name)
275
- prompt = conv.get_prompt()
276
- image_data = conv.image_data
277
- stop = conv.stop_str or []
278
- if request.stop:
279
- if isinstance(request.stop, str):
280
- stop.append(request.stop)
281
- else:
282
- stop.extend(request.stop)
283
- else:
284
- # Use the raw prompt and stop strings if the messages is already a string.
285
- prompt = request.messages
286
- stop = request.stop
287
- image_data = None
288
-
289
- adapted_request = GenerateReqInput(
290
- text=prompt,
291
- image_data=image_data,
292
- sampling_params={
293
- "temperature": request.temperature,
294
- "max_new_tokens": request.max_tokens,
295
- "stop": stop,
296
- "top_p": request.top_p,
297
- "presence_penalty": request.presence_penalty,
298
- "frequency_penalty": request.frequency_penalty,
299
- "regex": request.regex,
300
- "n": request.n,
301
- },
302
- stream=request.stream,
303
- )
304
-
305
- if adapted_request.stream:
306
-
307
- async def generate_stream_resp():
308
- is_first = True
309
-
310
- stream_buffer = ""
311
- try:
312
- async for content in tokenizer_manager.generate_request(
313
- adapted_request, raw_request
314
- ):
315
- if is_first:
316
- # First chunk with role
317
- is_first = False
318
- choice_data = ChatCompletionResponseStreamChoice(
319
- index=0,
320
- delta=DeltaMessage(role="assistant"),
321
- finish_reason=content["meta_info"]["finish_reason"],
322
- )
323
- chunk = ChatCompletionStreamResponse(
324
- id=content["meta_info"]["id"],
325
- choices=[choice_data],
326
- model=request.model,
327
- )
328
- yield f"data: {chunk.model_dump_json()}\n\n"
329
-
330
- text = content["text"]
331
- delta = text[len(stream_buffer) :]
332
- stream_buffer = stream_buffer + delta
333
- choice_data = ChatCompletionResponseStreamChoice(
334
- index=0,
335
- delta=DeltaMessage(content=delta),
336
- finish_reason=content["meta_info"]["finish_reason"],
337
- )
338
- chunk = ChatCompletionStreamResponse(
339
- id=content["meta_info"]["id"],
340
- choices=[choice_data],
341
- model=request.model,
342
- )
343
- yield f"data: {chunk.model_dump_json()}\n\n"
344
- except ValueError as e:
345
- error = create_streaming_error_response(str(e))
346
- yield f"data: {error}\n\n"
347
- yield "data: [DONE]\n\n"
348
-
349
- return StreamingResponse(
350
- generate_stream_resp(),
351
- media_type="text/event-stream",
352
- background=tokenizer_manager.create_abort_task(adapted_request),
353
- )
354
-
355
- # Non-streaming response.
356
- try:
357
- ret = await tokenizer_manager.generate_request(
358
- adapted_request, raw_request
359
- ).__anext__()
360
- except ValueError as e:
361
- return create_error_response(str(e))
362
-
363
- if not isinstance(ret, list):
364
- ret = [ret]
365
- choices = []
366
- total_prompt_tokens = 0
367
- total_completion_tokens = 0
368
-
369
- for idx, ret_item in enumerate(ret):
370
- prompt_tokens = ret_item["meta_info"]["prompt_tokens"]
371
- completion_tokens = ret_item["meta_info"]["completion_tokens"]
372
-
373
- choice_data = ChatCompletionResponseChoice(
374
- index=idx,
375
- message=ChatMessage(role="assistant", content=ret_item["text"]),
376
- finish_reason=ret_item["meta_info"]["finish_reason"],
377
- )
378
-
379
- choices.append(choice_data)
380
- total_prompt_tokens = prompt_tokens
381
- total_completion_tokens += completion_tokens
382
-
383
- response = ChatCompletionResponse(
384
- id=ret[0]["meta_info"]["id"],
385
- model=request.model,
386
- choices=choices,
387
- usage=UsageInfo(
388
- prompt_tokens=total_prompt_tokens,
389
- completion_tokens=total_completion_tokens,
390
- total_tokens=total_prompt_tokens + total_completion_tokens,
391
- ),
392
- )
393
-
394
- return response
395
-
396
-
397
- def to_openai_style_logprobs(
398
- prefill_token_logprobs=None,
399
- decode_token_logprobs=None,
400
- prefill_top_logprobs=None,
401
- decode_top_logprobs=None,
402
- ):
403
- ret_logprobs = LogProbs()
404
-
405
- def append_token_logprobs(token_logprobs):
406
- for logprob, _, token_text in token_logprobs:
407
- ret_logprobs.tokens.append(token_text)
408
- ret_logprobs.token_logprobs.append(logprob)
409
-
410
- # Not supported yet
411
- ret_logprobs.text_offset.append(-1)
412
-
413
- def append_top_logprobs(top_logprobs):
414
- for tokens in top_logprobs:
415
- if tokens is not None:
416
- ret_logprobs.top_logprobs.append(
417
- {token[2]: token[0] for token in tokens}
418
- )
419
- else:
420
- ret_logprobs.top_logprobs.append(None)
421
-
422
- if prefill_token_logprobs is not None:
423
- append_token_logprobs(prefill_token_logprobs)
424
- if decode_token_logprobs is not None:
425
- append_token_logprobs(decode_token_logprobs)
426
- if prefill_top_logprobs is not None:
427
- append_top_logprobs(prefill_top_logprobs)
428
- if decode_top_logprobs is not None:
429
- append_top_logprobs(decode_top_logprobs)
430
-
431
- return ret_logprobs
@@ -1,207 +0,0 @@
1
- """Pydantic models for OpenAI API protocol"""
2
-
3
- import time
4
- from typing import Dict, List, Optional, Union
5
-
6
- from pydantic import BaseModel, Field
7
- from typing_extensions import Literal
8
-
9
-
10
- class ModelCard(BaseModel):
11
- """Model cards."""
12
-
13
- id: str
14
- object: str = "model"
15
- created: int = Field(default_factory=lambda: int(time.time()))
16
- owned_by: str = "sglang"
17
- root: Optional[str] = None
18
-
19
-
20
- class ModelList(BaseModel):
21
- """Model list consists of model cards."""
22
-
23
- object: str = "list"
24
- data: List[ModelCard] = []
25
-
26
-
27
- class ErrorResponse(BaseModel):
28
- object: str = "error"
29
- message: str
30
- type: str
31
- param: Optional[str] = None
32
- code: int
33
-
34
-
35
- class LogProbs(BaseModel):
36
- text_offset: List[int] = Field(default_factory=list)
37
- token_logprobs: List[Optional[float]] = Field(default_factory=list)
38
- tokens: List[str] = Field(default_factory=list)
39
- top_logprobs: List[Optional[Dict[str, float]]] = Field(default_factory=list)
40
-
41
-
42
- class UsageInfo(BaseModel):
43
- prompt_tokens: int = 0
44
- total_tokens: int = 0
45
- completion_tokens: Optional[int] = 0
46
-
47
-
48
- class CompletionRequest(BaseModel):
49
- # Ordered by official OpenAI API documentation
50
- # https://platform.openai.com/docs/api-reference/completions/create
51
- model: str
52
- prompt: Union[List[int], List[List[int]], str, List[str]]
53
- best_of: Optional[int] = None
54
- echo: Optional[bool] = False
55
- frequency_penalty: Optional[float] = 0.0
56
- logit_bias: Optional[Dict[str, float]] = None
57
- logprobs: Optional[int] = None
58
- max_tokens: Optional[int] = 16
59
- n: int = 1
60
- presence_penalty: Optional[float] = 0.0
61
- seed: Optional[int] = None
62
- stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
63
- stream: Optional[bool] = False
64
- suffix: Optional[str] = None
65
- temperature: Optional[float] = 1.0
66
- top_p: Optional[float] = 1.0
67
- user: Optional[str] = None
68
-
69
- # Extra parameters for SRT backend only and will be ignored by OpenAI models.
70
- regex: Optional[str] = None
71
-
72
-
73
- class CompletionResponseChoice(BaseModel):
74
- index: int
75
- text: str
76
- logprobs: Optional[LogProbs] = None
77
- finish_reason: Optional[str] = None
78
-
79
-
80
- class CompletionResponse(BaseModel):
81
- id: str
82
- object: str = "text_completion"
83
- created: int = Field(default_factory=lambda: int(time.time()))
84
- model: str
85
- choices: List[CompletionResponseChoice]
86
- usage: UsageInfo
87
-
88
-
89
- class CompletionResponseStreamChoice(BaseModel):
90
- index: int
91
- text: str
92
- logprobs: Optional[LogProbs] = None
93
- finish_reason: Optional[str] = None
94
-
95
-
96
- class CompletionStreamResponse(BaseModel):
97
- id: str
98
- object: str = "text_completion"
99
- created: int = Field(default_factory=lambda: int(time.time()))
100
- model: str
101
- choices: List[CompletionResponseStreamChoice]
102
- usage: UsageInfo
103
-
104
-
105
- class ChatCompletionMessageGenericParam(BaseModel):
106
- role: Literal["system", "assistant"]
107
- content: str
108
-
109
-
110
- class ChatCompletionMessageContentTextPart(BaseModel):
111
- type: Literal["text"]
112
- text: str
113
-
114
-
115
- class ChatCompletionMessageContentImageURL(BaseModel):
116
- url: str
117
- detail: Optional[Literal["auto", "low", "high"]] = "auto"
118
-
119
-
120
- class ChatCompletionMessageContentImagePart(BaseModel):
121
- type: Literal["image_url"]
122
- image_url: ChatCompletionMessageContentImageURL
123
-
124
-
125
- ChatCompletionMessageContentPart = Union[
126
- ChatCompletionMessageContentTextPart, ChatCompletionMessageContentImagePart
127
- ]
128
-
129
-
130
- class ChatCompletionMessageUserParam(BaseModel):
131
- role: Literal["user"]
132
- content: Union[str, List[ChatCompletionMessageContentPart]]
133
-
134
-
135
- ChatCompletionMessageParam = Union[
136
- ChatCompletionMessageGenericParam, ChatCompletionMessageUserParam
137
- ]
138
-
139
-
140
- class ResponseFormat(BaseModel):
141
- # type must be "json_object" or "text"
142
- type: Literal["text", "json_object"]
143
-
144
-
145
- class ChatCompletionRequest(BaseModel):
146
- # Ordered by official OpenAI API documentation
147
- # https://platform.openai.com/docs/api-reference/chat/create
148
- messages: List[ChatCompletionMessageParam]
149
- model: str
150
- frequency_penalty: Optional[float] = 0.0
151
- logit_bias: Optional[Dict[str, float]] = None
152
- logprobs: Optional[bool] = False
153
- top_logprobs: Optional[int] = None
154
- max_tokens: Optional[int] = 16
155
- n: Optional[int] = 1
156
- presence_penalty: Optional[float] = 0.0
157
- response_format: Optional[ResponseFormat] = None
158
- seed: Optional[int] = None
159
- stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
160
- stream: Optional[bool] = False
161
- temperature: Optional[float] = 0.7
162
- top_p: Optional[float] = 1.0
163
- user: Optional[str] = None
164
-
165
- # Extra parameters for SRT backend only and will be ignored by OpenAI models.
166
- regex: Optional[str] = None
167
-
168
-
169
- class ChatMessage(BaseModel):
170
- role: Optional[str] = None
171
- content: Optional[str] = None
172
-
173
-
174
- class ChatCompletionResponseChoice(BaseModel):
175
- index: int
176
- message: ChatMessage
177
- logprobs: Optional[LogProbs] = None
178
- finish_reason: Optional[str] = None
179
-
180
-
181
- class ChatCompletionResponse(BaseModel):
182
- id: str
183
- object: str = "chat.completion"
184
- created: int = Field(default_factory=lambda: int(time.time()))
185
- model: str
186
- choices: List[ChatCompletionResponseChoice]
187
- usage: UsageInfo
188
-
189
-
190
- class DeltaMessage(BaseModel):
191
- role: Optional[str] = None
192
- content: Optional[str] = None
193
-
194
-
195
- class ChatCompletionResponseStreamChoice(BaseModel):
196
- index: int
197
- delta: DeltaMessage
198
- logprobs: Optional[LogProbs] = None
199
- finish_reason: Optional[str] = None
200
-
201
-
202
- class ChatCompletionStreamResponse(BaseModel):
203
- id: str
204
- object: str = "chat.completion.chunk"
205
- created: int = Field(default_factory=lambda: int(time.time()))
206
- model: str
207
- choices: List[ChatCompletionResponseStreamChoice]