sglang 0.4.10.post2__py3-none-any.whl → 0.5.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_one_batch.py +113 -17
- sglang/srt/configs/model_config.py +35 -0
- sglang/srt/conversation.py +9 -5
- sglang/srt/disaggregation/base/conn.py +5 -2
- sglang/srt/disaggregation/decode.py +6 -1
- sglang/srt/disaggregation/decode_schedule_batch_mixin.py +3 -0
- sglang/srt/disaggregation/mooncake/conn.py +243 -135
- sglang/srt/disaggregation/prefill.py +2 -0
- sglang/srt/distributed/parallel_state.py +11 -9
- sglang/srt/entrypoints/context.py +244 -0
- sglang/srt/entrypoints/engine.py +4 -3
- sglang/srt/entrypoints/harmony_utils.py +370 -0
- sglang/srt/entrypoints/http_server.py +71 -0
- sglang/srt/entrypoints/openai/protocol.py +227 -1
- sglang/srt/entrypoints/openai/serving_chat.py +278 -42
- sglang/srt/entrypoints/openai/serving_responses.py +1273 -0
- sglang/srt/entrypoints/openai/tool_server.py +174 -0
- sglang/srt/entrypoints/tool.py +87 -0
- sglang/srt/eplb/expert_location.py +5 -1
- sglang/srt/function_call/harmony_tool_parser.py +130 -0
- sglang/srt/hf_transformers_utils.py +30 -3
- sglang/srt/jinja_template_utils.py +8 -1
- sglang/srt/layers/attention/aiter_backend.py +5 -8
- sglang/srt/layers/attention/dual_chunk_flashattention_backend.py +1700 -0
- sglang/srt/layers/attention/triton_backend.py +85 -14
- sglang/srt/layers/attention/triton_ops/decode_attention.py +17 -0
- sglang/srt/layers/attention/triton_ops/extend_attention.py +143 -98
- sglang/srt/layers/attention/trtllm_mha_backend.py +332 -0
- sglang/srt/layers/attention/vision.py +13 -5
- sglang/srt/layers/communicator.py +21 -4
- sglang/srt/layers/dp_attention.py +12 -0
- sglang/srt/layers/linear.py +2 -7
- sglang/srt/layers/moe/cutlass_moe.py +20 -6
- sglang/srt/layers/moe/ep_moe/layer.py +77 -73
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +101 -12
- sglang/srt/layers/moe/fused_moe_triton/layer.py +416 -35
- sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +188 -3
- sglang/srt/layers/moe/topk.py +12 -3
- sglang/srt/layers/moe/utils.py +16 -0
- sglang/srt/layers/quantization/__init__.py +22 -0
- sglang/srt/layers/quantization/fp4.py +557 -0
- sglang/srt/layers/quantization/fp8.py +3 -6
- sglang/srt/layers/quantization/fp8_utils.py +29 -0
- sglang/srt/layers/quantization/modelopt_quant.py +259 -64
- sglang/srt/layers/quantization/mxfp4.py +651 -0
- sglang/srt/layers/quantization/mxfp4_tensor.py +133 -0
- sglang/srt/layers/quantization/quark/__init__.py +0 -0
- sglang/srt/layers/quantization/quark/schemes/__init__.py +6 -0
- sglang/srt/layers/quantization/quark/schemes/quark_scheme.py +55 -0
- sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py +118 -0
- sglang/srt/layers/quantization/quark/utils.py +107 -0
- sglang/srt/layers/quantization/unquant.py +60 -6
- sglang/srt/layers/quantization/w4afp8.py +1 -1
- sglang/srt/layers/rotary_embedding.py +225 -1
- sglang/srt/layers/utils.py +9 -0
- sglang/srt/layers/vocab_parallel_embedding.py +8 -3
- sglang/srt/lora/lora_manager.py +70 -14
- sglang/srt/lora/lora_registry.py +3 -2
- sglang/srt/lora/mem_pool.py +43 -5
- sglang/srt/managers/cache_controller.py +55 -30
- sglang/srt/managers/detokenizer_manager.py +1 -1
- sglang/srt/managers/io_struct.py +15 -3
- sglang/srt/managers/mm_utils.py +5 -11
- sglang/srt/managers/schedule_batch.py +28 -7
- sglang/srt/managers/scheduler.py +26 -12
- sglang/srt/managers/scheduler_output_processor_mixin.py +1 -2
- sglang/srt/managers/scheduler_recv_skipper.py +37 -0
- sglang/srt/managers/scheduler_update_weights_mixin.py +6 -0
- sglang/srt/managers/template_manager.py +35 -1
- sglang/srt/managers/tokenizer_manager.py +24 -6
- sglang/srt/managers/tp_worker.py +3 -0
- sglang/srt/managers/tp_worker_overlap_thread.py +3 -0
- sglang/srt/mem_cache/hiradix_cache.py +53 -5
- sglang/srt/mem_cache/memory_pool_host.py +1 -1
- sglang/srt/mem_cache/multimodal_cache.py +33 -13
- sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py +2 -2
- sglang/srt/model_executor/cuda_graph_runner.py +7 -6
- sglang/srt/model_executor/forward_batch_info.py +35 -14
- sglang/srt/model_executor/model_runner.py +19 -2
- sglang/srt/model_loader/weight_utils.py +10 -0
- sglang/srt/models/bailing_moe.py +425 -0
- sglang/srt/models/deepseek_v2.py +72 -33
- sglang/srt/models/ernie4.py +426 -0
- sglang/srt/models/ernie4_eagle.py +203 -0
- sglang/srt/models/gemma3n_mm.py +39 -0
- sglang/srt/models/glm4_moe.py +24 -12
- sglang/srt/models/gpt_oss.py +1134 -0
- sglang/srt/models/qwen2.py +6 -0
- sglang/srt/models/qwen2_moe.py +6 -0
- sglang/srt/models/qwen3_moe.py +32 -6
- sglang/srt/models/step3_vl.py +9 -0
- sglang/srt/models/transformers.py +2 -5
- sglang/srt/multimodal/processors/step3_vl.py +3 -1
- sglang/srt/reasoning_parser.py +18 -39
- sglang/srt/server_args.py +142 -7
- sglang/srt/two_batch_overlap.py +157 -5
- sglang/srt/utils.py +38 -2
- sglang/test/runners.py +2 -2
- sglang/test/test_utils.py +1 -1
- sglang/version.py +1 -1
- {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc0.dist-info}/METADATA +16 -14
- {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc0.dist-info}/RECORD +105 -84
- {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc0.dist-info}/WHEEL +0 -0
- {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc0.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc0.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,7 @@ from typing import AsyncIterator, Callable, Dict, Optional
|
|
32
32
|
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
33
33
|
|
34
34
|
from contextlib import asynccontextmanager
|
35
|
+
from typing import AsyncGenerator
|
35
36
|
|
36
37
|
import numpy as np
|
37
38
|
import orjson
|
@@ -56,6 +57,7 @@ from sglang.srt.entrypoints.openai.protocol import (
|
|
56
57
|
ErrorResponse,
|
57
58
|
ModelCard,
|
58
59
|
ModelList,
|
60
|
+
ResponsesRequest,
|
59
61
|
ScoringRequest,
|
60
62
|
V1RerankReqInput,
|
61
63
|
)
|
@@ -147,6 +149,37 @@ async def lifespan(fast_api_app: FastAPI):
|
|
147
149
|
)
|
148
150
|
|
149
151
|
server_args: ServerArgs = fast_api_app.server_args
|
152
|
+
|
153
|
+
tool_server = None
|
154
|
+
if server_args.tool_server == "demo":
|
155
|
+
from sglang.srt.entrypoints.openai.tool_server import DemoToolServer
|
156
|
+
|
157
|
+
tool_server = DemoToolServer()
|
158
|
+
elif server_args.tool_server:
|
159
|
+
from sglang.srt.entrypoints.openai.tool_server import MCPToolServer
|
160
|
+
|
161
|
+
tool_server = MCPToolServer()
|
162
|
+
await tool_server.add_tool_server(server_args.tool_server)
|
163
|
+
|
164
|
+
try:
|
165
|
+
from sglang.srt.entrypoints.openai.serving_responses import (
|
166
|
+
OpenAIServingResponses,
|
167
|
+
)
|
168
|
+
|
169
|
+
fast_api_app.state.openai_serving_responses = OpenAIServingResponses(
|
170
|
+
_global_state.tokenizer_manager,
|
171
|
+
_global_state.template_manager,
|
172
|
+
enable_prompt_tokens_details=True,
|
173
|
+
enable_force_include_usage=True,
|
174
|
+
tool_server=tool_server,
|
175
|
+
)
|
176
|
+
except Exception as e:
|
177
|
+
# print stack trace
|
178
|
+
import traceback
|
179
|
+
|
180
|
+
traceback.print_exc()
|
181
|
+
logger.warning(f"Can not initialize OpenAIServingResponses, error: {e}")
|
182
|
+
|
150
183
|
if server_args.warmups is not None:
|
151
184
|
await execute_warmups(
|
152
185
|
server_args.disaggregation_mode,
|
@@ -843,6 +876,42 @@ async def v1_score_request(request: ScoringRequest, raw_request: Request):
|
|
843
876
|
)
|
844
877
|
|
845
878
|
|
879
|
+
@app.post("/v1/responses", dependencies=[Depends(validate_json_request)])
|
880
|
+
async def v1_responses_request(request: dict, raw_request: Request):
|
881
|
+
"""Endpoint for the responses API with reasoning support."""
|
882
|
+
|
883
|
+
request_obj = ResponsesRequest(**request)
|
884
|
+
result = await raw_request.app.state.openai_serving_responses.create_responses(
|
885
|
+
request_obj, raw_request
|
886
|
+
)
|
887
|
+
|
888
|
+
# Handle streaming responses
|
889
|
+
if isinstance(result, AsyncGenerator):
|
890
|
+
return StreamingResponse(
|
891
|
+
result,
|
892
|
+
media_type="text/event-stream",
|
893
|
+
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
894
|
+
)
|
895
|
+
|
896
|
+
return result
|
897
|
+
|
898
|
+
|
899
|
+
@app.get("/v1/responses/{response_id}")
|
900
|
+
async def v1_retrieve_responses(response_id: str, raw_request: Request):
|
901
|
+
"""Retrieve a response by ID."""
|
902
|
+
return await raw_request.app.state.openai_serving_responses.retrieve_responses(
|
903
|
+
response_id
|
904
|
+
)
|
905
|
+
|
906
|
+
|
907
|
+
@app.post("/v1/responses/{response_id}/cancel")
|
908
|
+
async def v1_cancel_responses(response_id: str, raw_request: Request):
|
909
|
+
"""Cancel a background response."""
|
910
|
+
return await raw_request.app.state.openai_serving_responses.cancel_responses(
|
911
|
+
response_id
|
912
|
+
)
|
913
|
+
|
914
|
+
|
846
915
|
@app.api_route(
|
847
916
|
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
|
848
917
|
)
|
@@ -1103,6 +1172,8 @@ def _wait_and_warmup(
|
|
1103
1172
|
pipe_finish_writer,
|
1104
1173
|
):
|
1105
1174
|
return
|
1175
|
+
else:
|
1176
|
+
_global_state.tokenizer_manager.server_status = ServerStatus.Up
|
1106
1177
|
|
1107
1178
|
logger.info("The server is fired up and ready to roll!")
|
1108
1179
|
|
@@ -14,9 +14,18 @@
|
|
14
14
|
"""Pydantic models for OpenAI API protocol"""
|
15
15
|
|
16
16
|
import time
|
17
|
+
import uuid
|
17
18
|
from dataclasses import dataclass
|
18
|
-
from typing import Any, Dict, List, Optional, Union
|
19
|
+
from typing import Any, Dict, List, Optional, TypeAlias, Union
|
19
20
|
|
21
|
+
from openai.types.responses import (
|
22
|
+
ResponseFunctionToolCall,
|
23
|
+
ResponseInputItemParam,
|
24
|
+
ResponseOutputItem,
|
25
|
+
ResponseReasoningItem,
|
26
|
+
)
|
27
|
+
from openai.types.responses.response import ToolChoice
|
28
|
+
from openai.types.responses.tool import Tool
|
20
29
|
from pydantic import (
|
21
30
|
BaseModel,
|
22
31
|
Field,
|
@@ -84,6 +93,7 @@ class UsageInfo(BaseModel):
|
|
84
93
|
completion_tokens: Optional[int] = 0
|
85
94
|
# only used to return cached tokens when --enable-cache-report is set
|
86
95
|
prompt_tokens_details: Optional[Dict[str, int]] = None
|
96
|
+
reasoning_tokens: Optional[int] = 0
|
87
97
|
|
88
98
|
|
89
99
|
class StreamOptions(BaseModel):
|
@@ -428,6 +438,13 @@ class ChatCompletionRequest(BaseModel):
|
|
428
438
|
default="auto", examples=["none"]
|
429
439
|
) # noqa
|
430
440
|
return_hidden_states: bool = False
|
441
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
|
442
|
+
default="medium",
|
443
|
+
description="Constrains effort on reasoning for reasoning models. "
|
444
|
+
"'low' is the least effort, 'high' is the most effort. Reducing reasoning effort can "
|
445
|
+
"result in faster responses and fewer tokens used on reasoning in a response. "
|
446
|
+
"Currently only supported for OpenAI models.",
|
447
|
+
)
|
431
448
|
|
432
449
|
@model_validator(mode="before")
|
433
450
|
@classmethod
|
@@ -619,6 +636,196 @@ OpenAIServingRequest = Union[
|
|
619
636
|
]
|
620
637
|
|
621
638
|
|
639
|
+
# Response API protocol definitions
|
640
|
+
class ResponseReasoningParam(BaseModel):
|
641
|
+
"""Reasoning parameters for responses."""
|
642
|
+
|
643
|
+
effort: Optional[Literal["low", "medium", "high"]] = Field(
|
644
|
+
default="medium",
|
645
|
+
description="Constrains effort on reasoning for reasoning models.",
|
646
|
+
)
|
647
|
+
|
648
|
+
|
649
|
+
class ResponseTool(BaseModel):
|
650
|
+
"""Tool definition for responses."""
|
651
|
+
|
652
|
+
type: Literal["web_search_preview", "code_interpreter"] = Field(
|
653
|
+
description="Type of tool to enable"
|
654
|
+
)
|
655
|
+
|
656
|
+
|
657
|
+
ResponseInputOutputItem: TypeAlias = Union[
|
658
|
+
ResponseInputItemParam,
|
659
|
+
"ResponseReasoningItem",
|
660
|
+
ResponseFunctionToolCall,
|
661
|
+
]
|
662
|
+
|
663
|
+
|
664
|
+
class ResponsesRequest(BaseModel):
|
665
|
+
"""Request body for v1/responses endpoint."""
|
666
|
+
|
667
|
+
# Core OpenAI API fields (ordered by official documentation)
|
668
|
+
background: Optional[bool] = False
|
669
|
+
include: Optional[
|
670
|
+
List[
|
671
|
+
Literal[
|
672
|
+
"code_interpreter_call.outputs",
|
673
|
+
"computer_call_output.output.image_url",
|
674
|
+
"file_search_call.results",
|
675
|
+
"message.input_image.image_url",
|
676
|
+
"message.output_text.logprobs",
|
677
|
+
"reasoning.encrypted_content",
|
678
|
+
]
|
679
|
+
]
|
680
|
+
] = None
|
681
|
+
input: Union[str, List[ResponseInputOutputItem]]
|
682
|
+
instructions: Optional[str] = None
|
683
|
+
max_output_tokens: Optional[int] = None
|
684
|
+
max_tool_calls: Optional[int] = None
|
685
|
+
metadata: Optional[Dict[str, Any]] = None
|
686
|
+
model: Optional[str] = None # Made optional to match vLLM
|
687
|
+
parallel_tool_calls: Optional[bool] = True
|
688
|
+
previous_response_id: Optional[str] = None
|
689
|
+
reasoning: Optional[ResponseReasoningParam] = None
|
690
|
+
service_tier: Literal["auto", "default", "flex", "scale", "priority"] = "auto"
|
691
|
+
store: Optional[bool] = True
|
692
|
+
stream: Optional[bool] = False
|
693
|
+
temperature: Optional[float] = None
|
694
|
+
tool_choice: Literal["auto", "required", "none"] = "auto"
|
695
|
+
tools: List[ResponseTool] = Field(default_factory=list)
|
696
|
+
top_logprobs: Optional[int] = 0
|
697
|
+
top_p: Optional[float] = None
|
698
|
+
truncation: Optional[Literal["auto", "disabled"]] = "disabled"
|
699
|
+
user: Optional[str] = None
|
700
|
+
|
701
|
+
# Extra SGLang parameters
|
702
|
+
request_id: str = Field(
|
703
|
+
default_factory=lambda: f"resp_{uuid.uuid4().hex}",
|
704
|
+
description="The request_id related to this request. If the caller does not set it, a random uuid will be generated.",
|
705
|
+
)
|
706
|
+
priority: int = Field(default=0, description="Request priority")
|
707
|
+
|
708
|
+
# SGLang-specific sampling parameters
|
709
|
+
frequency_penalty: float = 0.0
|
710
|
+
presence_penalty: float = 0.0
|
711
|
+
stop: Optional[Union[str, List[str]]] = None
|
712
|
+
top_k: int = -1
|
713
|
+
min_p: float = 0.0
|
714
|
+
repetition_penalty: float = 1.0
|
715
|
+
|
716
|
+
# Default sampling parameters
|
717
|
+
_DEFAULT_SAMPLING_PARAMS = {
|
718
|
+
"temperature": 0.7,
|
719
|
+
"top_p": 1.0,
|
720
|
+
"top_k": -1,
|
721
|
+
"min_p": 0.0,
|
722
|
+
"repetition_penalty": 1.0,
|
723
|
+
}
|
724
|
+
|
725
|
+
def to_sampling_params(
|
726
|
+
self, default_max_tokens: int, default_params: Optional[Dict] = None
|
727
|
+
) -> Dict[str, Any]:
|
728
|
+
"""Convert to sampling parameters for generation."""
|
729
|
+
if default_params is None:
|
730
|
+
default_params = {}
|
731
|
+
|
732
|
+
# Use max_output_tokens if available, otherwise use max_tokens for backwards compatibility
|
733
|
+
if self.max_output_tokens is not None:
|
734
|
+
max_tokens = min(self.max_output_tokens, default_max_tokens)
|
735
|
+
else:
|
736
|
+
max_tokens = default_max_tokens
|
737
|
+
|
738
|
+
# Avoid exceed the context length by minus 1 token
|
739
|
+
max_tokens -= 1
|
740
|
+
|
741
|
+
# Get parameters with defaults
|
742
|
+
temperature = self.temperature
|
743
|
+
if temperature is None:
|
744
|
+
temperature = default_params.get(
|
745
|
+
"temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
|
746
|
+
)
|
747
|
+
|
748
|
+
top_p = self.top_p
|
749
|
+
if top_p is None:
|
750
|
+
top_p = default_params.get("top_p", self._DEFAULT_SAMPLING_PARAMS["top_p"])
|
751
|
+
|
752
|
+
params = {
|
753
|
+
"max_new_tokens": max_tokens,
|
754
|
+
"temperature": temperature,
|
755
|
+
"top_p": top_p,
|
756
|
+
"frequency_penalty": self.frequency_penalty,
|
757
|
+
"presence_penalty": self.presence_penalty,
|
758
|
+
"stop": self.stop,
|
759
|
+
"top_k": self.top_k,
|
760
|
+
"min_p": self.min_p,
|
761
|
+
"repetition_penalty": self.repetition_penalty,
|
762
|
+
}
|
763
|
+
|
764
|
+
# Apply any additional default parameters
|
765
|
+
for key, value in default_params.items():
|
766
|
+
if key not in params or params[key] is None:
|
767
|
+
params[key] = value
|
768
|
+
|
769
|
+
return params
|
770
|
+
|
771
|
+
|
772
|
+
class PromptTokenUsageInfo(BaseModel):
|
773
|
+
"""Prompt token usage details."""
|
774
|
+
|
775
|
+
cached_tokens: int = 0
|
776
|
+
|
777
|
+
|
778
|
+
class ResponsesResponse(BaseModel):
|
779
|
+
"""Response body for v1/responses endpoint."""
|
780
|
+
|
781
|
+
id: str = Field(default_factory=lambda: f"resp_{time.time()}")
|
782
|
+
object: Literal["response"] = "response"
|
783
|
+
created_at: int = Field(default_factory=lambda: int(time.time()))
|
784
|
+
model: str
|
785
|
+
|
786
|
+
output: List[
|
787
|
+
Union[ResponseOutputItem, ResponseReasoningItem, ResponseFunctionToolCall]
|
788
|
+
] = Field(default_factory=list)
|
789
|
+
status: Literal["queued", "in_progress", "completed", "failed", "cancelled"]
|
790
|
+
usage: Optional[UsageInfo] = None
|
791
|
+
parallel_tool_calls: bool = True
|
792
|
+
tool_choice: str = "auto"
|
793
|
+
tools: List[ResponseTool] = Field(default_factory=list)
|
794
|
+
|
795
|
+
@classmethod
|
796
|
+
def from_request(
|
797
|
+
cls,
|
798
|
+
request: ResponsesRequest,
|
799
|
+
sampling_params: Any,
|
800
|
+
model_name: str,
|
801
|
+
created_time: int,
|
802
|
+
output: List[
|
803
|
+
Union[ResponseOutputItem, ResponseReasoningItem, ResponseFunctionToolCall]
|
804
|
+
],
|
805
|
+
status: str,
|
806
|
+
usage: Optional[UsageInfo],
|
807
|
+
) -> "ResponsesResponse":
|
808
|
+
"""Create a response from a request."""
|
809
|
+
return cls(
|
810
|
+
id=request.request_id,
|
811
|
+
created_at=created_time,
|
812
|
+
model=model_name,
|
813
|
+
output=output,
|
814
|
+
status=status,
|
815
|
+
usage=usage,
|
816
|
+
parallel_tool_calls=request.parallel_tool_calls or True,
|
817
|
+
tool_choice=request.tool_choice,
|
818
|
+
tools=request.tools,
|
819
|
+
)
|
820
|
+
|
821
|
+
|
822
|
+
class RequestResponseMetadata(BaseModel):
|
823
|
+
"""Metadata for request/response tracking."""
|
824
|
+
|
825
|
+
request_id: str
|
826
|
+
final_usage_info: Optional[UsageInfo] = None
|
827
|
+
|
828
|
+
|
622
829
|
@dataclass
|
623
830
|
class MessageProcessingResult:
|
624
831
|
"""Result of processing chat messages and applying templates.
|
@@ -645,3 +852,22 @@ class MessageProcessingResult:
|
|
645
852
|
modalities: List[str]
|
646
853
|
stop: List[str]
|
647
854
|
tool_call_constraint: Optional[Any] = None
|
855
|
+
|
856
|
+
|
857
|
+
class ResponseReasoningTextContent(BaseModel):
|
858
|
+
text: str
|
859
|
+
type: Literal["reasoning_text"] = "reasoning_text"
|
860
|
+
|
861
|
+
|
862
|
+
class ResponseReasoningItem(BaseModel):
|
863
|
+
id: str
|
864
|
+
content: list[ResponseReasoningTextContent] = Field(default_factory=list)
|
865
|
+
summary: list = Field(default_factory=list)
|
866
|
+
type: Literal["reasoning"] = "reasoning"
|
867
|
+
encrypted_content: Optional[str] = None
|
868
|
+
status: Optional[Literal["in_progress", "completed", "incomplete"]]
|
869
|
+
|
870
|
+
|
871
|
+
ResponseInputOutputItem: TypeAlias = Union[
|
872
|
+
ResponseInputItemParam, "ResponseReasoningItem", ResponseFunctionToolCall
|
873
|
+
]
|