openai-agents 0.2.8__py3-none-any.whl → 0.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +105 -4
- agents/_debug.py +15 -4
- agents/_run_impl.py +1203 -96
- agents/agent.py +164 -19
- agents/apply_diff.py +329 -0
- agents/editor.py +47 -0
- agents/exceptions.py +35 -0
- agents/extensions/experimental/__init__.py +6 -0
- agents/extensions/experimental/codex/__init__.py +92 -0
- agents/extensions/experimental/codex/codex.py +89 -0
- agents/extensions/experimental/codex/codex_options.py +35 -0
- agents/extensions/experimental/codex/codex_tool.py +1142 -0
- agents/extensions/experimental/codex/events.py +162 -0
- agents/extensions/experimental/codex/exec.py +263 -0
- agents/extensions/experimental/codex/items.py +245 -0
- agents/extensions/experimental/codex/output_schema_file.py +50 -0
- agents/extensions/experimental/codex/payloads.py +31 -0
- agents/extensions/experimental/codex/thread.py +214 -0
- agents/extensions/experimental/codex/thread_options.py +54 -0
- agents/extensions/experimental/codex/turn_options.py +36 -0
- agents/extensions/handoff_filters.py +13 -1
- agents/extensions/memory/__init__.py +120 -0
- agents/extensions/memory/advanced_sqlite_session.py +1285 -0
- agents/extensions/memory/async_sqlite_session.py +239 -0
- agents/extensions/memory/dapr_session.py +423 -0
- agents/extensions/memory/encrypt_session.py +185 -0
- agents/extensions/memory/redis_session.py +261 -0
- agents/extensions/memory/sqlalchemy_session.py +334 -0
- agents/extensions/models/litellm_model.py +449 -36
- agents/extensions/models/litellm_provider.py +3 -1
- agents/function_schema.py +47 -5
- agents/guardrail.py +16 -2
- agents/{handoffs.py → handoffs/__init__.py} +89 -47
- agents/handoffs/history.py +268 -0
- agents/items.py +237 -11
- agents/lifecycle.py +75 -14
- agents/mcp/server.py +280 -37
- agents/mcp/util.py +24 -3
- agents/memory/__init__.py +22 -2
- agents/memory/openai_conversations_session.py +91 -0
- agents/memory/openai_responses_compaction_session.py +249 -0
- agents/memory/session.py +19 -261
- agents/memory/sqlite_session.py +275 -0
- agents/memory/util.py +20 -0
- agents/model_settings.py +14 -3
- agents/models/__init__.py +13 -0
- agents/models/chatcmpl_converter.py +303 -50
- agents/models/chatcmpl_helpers.py +63 -0
- agents/models/chatcmpl_stream_handler.py +290 -68
- agents/models/default_models.py +58 -0
- agents/models/interface.py +4 -0
- agents/models/openai_chatcompletions.py +103 -49
- agents/models/openai_provider.py +10 -4
- agents/models/openai_responses.py +162 -46
- agents/realtime/__init__.py +4 -0
- agents/realtime/_util.py +14 -3
- agents/realtime/agent.py +7 -0
- agents/realtime/audio_formats.py +53 -0
- agents/realtime/config.py +78 -10
- agents/realtime/events.py +18 -0
- agents/realtime/handoffs.py +2 -2
- agents/realtime/items.py +17 -1
- agents/realtime/model.py +13 -0
- agents/realtime/model_events.py +12 -0
- agents/realtime/model_inputs.py +18 -1
- agents/realtime/openai_realtime.py +696 -150
- agents/realtime/session.py +243 -23
- agents/repl.py +7 -3
- agents/result.py +197 -38
- agents/run.py +949 -168
- agents/run_context.py +13 -2
- agents/stream_events.py +1 -0
- agents/strict_schema.py +14 -0
- agents/tool.py +413 -15
- agents/tool_context.py +22 -1
- agents/tool_guardrails.py +279 -0
- agents/tracing/__init__.py +2 -0
- agents/tracing/config.py +9 -0
- agents/tracing/create.py +4 -0
- agents/tracing/processor_interface.py +84 -11
- agents/tracing/processors.py +65 -54
- agents/tracing/provider.py +64 -7
- agents/tracing/spans.py +105 -0
- agents/tracing/traces.py +116 -16
- agents/usage.py +134 -12
- agents/util/_json.py +19 -1
- agents/util/_transforms.py +12 -2
- agents/voice/input.py +5 -4
- agents/voice/models/openai_stt.py +17 -9
- agents/voice/pipeline.py +2 -0
- agents/voice/pipeline_config.py +4 -0
- {openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/METADATA +44 -19
- openai_agents-0.6.8.dist-info/RECORD +134 -0
- {openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/WHEEL +1 -1
- openai_agents-0.2.8.dist-info/RECORD +0 -103
- {openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import os
|
|
4
5
|
import time
|
|
5
6
|
from collections.abc import AsyncIterator
|
|
7
|
+
from copy import copy
|
|
6
8
|
from typing import Any, Literal, cast, overload
|
|
7
9
|
|
|
8
10
|
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
|
|
@@ -17,10 +19,12 @@ except ImportError as _e:
|
|
|
17
19
|
"dependency group: `pip install 'openai-agents[litellm]'`."
|
|
18
20
|
) from _e
|
|
19
21
|
|
|
20
|
-
from openai import
|
|
22
|
+
from openai import AsyncStream, NotGiven, omit
|
|
21
23
|
from openai.types.chat import (
|
|
22
24
|
ChatCompletionChunk,
|
|
25
|
+
ChatCompletionMessageCustomToolCall,
|
|
23
26
|
ChatCompletionMessageFunctionToolCall,
|
|
27
|
+
ChatCompletionMessageParam,
|
|
24
28
|
)
|
|
25
29
|
from openai.types.chat.chat_completion_message import (
|
|
26
30
|
Annotation,
|
|
@@ -28,8 +32,8 @@ from openai.types.chat.chat_completion_message import (
|
|
|
28
32
|
ChatCompletionMessage,
|
|
29
33
|
)
|
|
30
34
|
from openai.types.chat.chat_completion_message_function_tool_call import Function
|
|
31
|
-
from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
|
|
32
35
|
from openai.types.responses import Response
|
|
36
|
+
from pydantic import BaseModel
|
|
33
37
|
|
|
34
38
|
from ... import _debug
|
|
35
39
|
from ...agent_output import AgentOutputSchemaBase
|
|
@@ -38,23 +42,94 @@ from ...items import ModelResponse, TResponseInputItem, TResponseStreamEvent
|
|
|
38
42
|
from ...logger import logger
|
|
39
43
|
from ...model_settings import ModelSettings
|
|
40
44
|
from ...models.chatcmpl_converter import Converter
|
|
41
|
-
from ...models.chatcmpl_helpers import HEADERS
|
|
45
|
+
from ...models.chatcmpl_helpers import HEADERS, HEADERS_OVERRIDE
|
|
42
46
|
from ...models.chatcmpl_stream_handler import ChatCmplStreamHandler
|
|
43
47
|
from ...models.fake_id import FAKE_RESPONSES_ID
|
|
44
48
|
from ...models.interface import Model, ModelTracing
|
|
49
|
+
from ...models.openai_responses import Converter as OpenAIResponsesConverter
|
|
45
50
|
from ...tool import Tool
|
|
46
51
|
from ...tracing import generation_span
|
|
47
52
|
from ...tracing.span_data import GenerationSpanData
|
|
48
53
|
from ...tracing.spans import Span
|
|
49
54
|
from ...usage import Usage
|
|
55
|
+
from ...util._json import _to_dump_compatible
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _patch_litellm_serializer_warnings() -> None:
|
|
59
|
+
"""Ensure LiteLLM logging uses model_dump(warnings=False) when available."""
|
|
60
|
+
# Background: LiteLLM emits Pydantic serializer warnings for Message/Choices mismatches.
|
|
61
|
+
# See: https://github.com/BerriAI/litellm/issues/11759
|
|
62
|
+
# This patch relies on a private LiteLLM helper; if the name or signature changes,
|
|
63
|
+
# the wrapper should no-op or fall back to LiteLLM's default behavior. Revisit on upgrade.
|
|
64
|
+
# Remove this patch once the LiteLLM issue is resolved.
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
from litellm.litellm_core_utils import litellm_logging as _litellm_logging
|
|
68
|
+
except Exception:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
# Guard against double-patching if this module is imported multiple times.
|
|
72
|
+
if getattr(_litellm_logging, "_openai_agents_patched_serializer_warnings", False):
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
original = getattr(_litellm_logging, "_extract_response_obj_and_hidden_params", None)
|
|
76
|
+
if original is None:
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
def _wrapped_extract_response_obj_and_hidden_params(*args, **kwargs):
|
|
80
|
+
# init_response_obj is LiteLLM's raw response container (often a Pydantic BaseModel).
|
|
81
|
+
# Accept arbitrary args to stay compatible if LiteLLM changes the signature.
|
|
82
|
+
init_response_obj = args[0] if args else kwargs.get("init_response_obj")
|
|
83
|
+
if isinstance(init_response_obj, BaseModel):
|
|
84
|
+
hidden_params = getattr(init_response_obj, "_hidden_params", None)
|
|
85
|
+
try:
|
|
86
|
+
response_obj = init_response_obj.model_dump(warnings=False)
|
|
87
|
+
except TypeError:
|
|
88
|
+
response_obj = init_response_obj.model_dump()
|
|
89
|
+
if args:
|
|
90
|
+
response_obj_out, original_hidden = original(response_obj, *args[1:], **kwargs)
|
|
91
|
+
else:
|
|
92
|
+
updated_kwargs = dict(kwargs)
|
|
93
|
+
updated_kwargs["init_response_obj"] = response_obj
|
|
94
|
+
response_obj_out, original_hidden = original(**updated_kwargs)
|
|
95
|
+
return response_obj_out, hidden_params or original_hidden
|
|
96
|
+
|
|
97
|
+
return original(*args, **kwargs)
|
|
98
|
+
|
|
99
|
+
setattr( # noqa: B010
|
|
100
|
+
_litellm_logging,
|
|
101
|
+
"_extract_response_obj_and_hidden_params",
|
|
102
|
+
_wrapped_extract_response_obj_and_hidden_params,
|
|
103
|
+
)
|
|
104
|
+
setattr( # noqa: B010
|
|
105
|
+
_litellm_logging,
|
|
106
|
+
"_openai_agents_patched_serializer_warnings",
|
|
107
|
+
True,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# Set OPENAI_AGENTS_ENABLE_LITELLM_SERIALIZER_PATCH=true to opt in.
|
|
112
|
+
_enable_litellm_patch = os.getenv("OPENAI_AGENTS_ENABLE_LITELLM_SERIALIZER_PATCH", "")
|
|
113
|
+
if _enable_litellm_patch.lower() in ("1", "true"):
|
|
114
|
+
_patch_litellm_serializer_warnings()
|
|
50
115
|
|
|
51
116
|
|
|
52
117
|
class InternalChatCompletionMessage(ChatCompletionMessage):
|
|
53
118
|
"""
|
|
54
|
-
An internal subclass to carry reasoning_content without modifying the original model.
|
|
55
|
-
"""
|
|
119
|
+
An internal subclass to carry reasoning_content and thinking_blocks without modifying the original model.
|
|
120
|
+
""" # noqa: E501
|
|
56
121
|
|
|
57
122
|
reasoning_content: str
|
|
123
|
+
thinking_blocks: list[dict[str, Any]] | None = None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class InternalToolCall(ChatCompletionMessageFunctionToolCall):
|
|
127
|
+
"""
|
|
128
|
+
An internal subclass to carry provider-specific metadata (e.g., Gemini thought signatures)
|
|
129
|
+
without modifying the original model.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
extra_content: dict[str, Any] | None = None
|
|
58
133
|
|
|
59
134
|
|
|
60
135
|
class LitellmModel(Model):
|
|
@@ -82,7 +157,8 @@ class LitellmModel(Model):
|
|
|
82
157
|
output_schema: AgentOutputSchemaBase | None,
|
|
83
158
|
handoffs: list[Handoff],
|
|
84
159
|
tracing: ModelTracing,
|
|
85
|
-
previous_response_id: str | None,
|
|
160
|
+
previous_response_id: str | None = None, # unused
|
|
161
|
+
conversation_id: str | None = None, # unused
|
|
86
162
|
prompt: Any | None = None,
|
|
87
163
|
) -> ModelResponse:
|
|
88
164
|
with generation_span(
|
|
@@ -104,18 +180,26 @@ class LitellmModel(Model):
|
|
|
104
180
|
prompt=prompt,
|
|
105
181
|
)
|
|
106
182
|
|
|
107
|
-
|
|
183
|
+
message: litellm.types.utils.Message | None = None
|
|
184
|
+
first_choice: litellm.types.utils.Choices | None = None
|
|
185
|
+
if response.choices and len(response.choices) > 0:
|
|
186
|
+
choice = response.choices[0]
|
|
187
|
+
if isinstance(choice, litellm.types.utils.Choices):
|
|
188
|
+
first_choice = choice
|
|
189
|
+
message = first_choice.message
|
|
108
190
|
|
|
109
191
|
if _debug.DONT_LOG_MODEL_DATA:
|
|
110
192
|
logger.debug("Received model response")
|
|
111
193
|
else:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
194
|
+
if message is not None:
|
|
195
|
+
logger.debug(
|
|
196
|
+
f"""LLM resp:\n{
|
|
197
|
+
json.dumps(message.model_dump(), indent=2, ensure_ascii=False)
|
|
198
|
+
}\n"""
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
finish_reason = first_choice.finish_reason if first_choice else "-"
|
|
202
|
+
logger.debug(f"LLM resp had no message. finish_reason: {finish_reason}")
|
|
119
203
|
|
|
120
204
|
if hasattr(response, "usage"):
|
|
121
205
|
response_usage = response.usage
|
|
@@ -146,14 +230,26 @@ class LitellmModel(Model):
|
|
|
146
230
|
logger.warning("No usage information returned from Litellm")
|
|
147
231
|
|
|
148
232
|
if tracing.include_data():
|
|
149
|
-
span_generation.span_data.output =
|
|
233
|
+
span_generation.span_data.output = (
|
|
234
|
+
[message.model_dump()] if message is not None else []
|
|
235
|
+
)
|
|
150
236
|
span_generation.span_data.usage = {
|
|
151
237
|
"input_tokens": usage.input_tokens,
|
|
152
238
|
"output_tokens": usage.output_tokens,
|
|
153
239
|
}
|
|
154
240
|
|
|
155
|
-
|
|
156
|
-
|
|
241
|
+
# Build provider_data for provider specific fields
|
|
242
|
+
provider_data: dict[str, Any] = {"model": self.model}
|
|
243
|
+
if message is not None and hasattr(response, "id"):
|
|
244
|
+
provider_data["response_id"] = response.id
|
|
245
|
+
|
|
246
|
+
items = (
|
|
247
|
+
Converter.message_to_output_items(
|
|
248
|
+
LitellmConverter.convert_message_to_openai(message, model=self.model),
|
|
249
|
+
provider_data=provider_data,
|
|
250
|
+
)
|
|
251
|
+
if message is not None
|
|
252
|
+
else []
|
|
157
253
|
)
|
|
158
254
|
|
|
159
255
|
return ModelResponse(
|
|
@@ -171,7 +267,8 @@ class LitellmModel(Model):
|
|
|
171
267
|
output_schema: AgentOutputSchemaBase | None,
|
|
172
268
|
handoffs: list[Handoff],
|
|
173
269
|
tracing: ModelTracing,
|
|
174
|
-
previous_response_id: str | None,
|
|
270
|
+
previous_response_id: str | None = None, # unused
|
|
271
|
+
conversation_id: str | None = None, # unused
|
|
175
272
|
prompt: Any | None = None,
|
|
176
273
|
) -> AsyncIterator[TResponseStreamEvent]:
|
|
177
274
|
with generation_span(
|
|
@@ -194,7 +291,9 @@ class LitellmModel(Model):
|
|
|
194
291
|
)
|
|
195
292
|
|
|
196
293
|
final_response: Response | None = None
|
|
197
|
-
async for chunk in ChatCmplStreamHandler.handle_stream(
|
|
294
|
+
async for chunk in ChatCmplStreamHandler.handle_stream(
|
|
295
|
+
response, stream, model=self.model
|
|
296
|
+
):
|
|
198
297
|
yield chunk
|
|
199
298
|
|
|
200
299
|
if chunk.type == "response.completed":
|
|
@@ -252,7 +351,29 @@ class LitellmModel(Model):
|
|
|
252
351
|
stream: bool = False,
|
|
253
352
|
prompt: Any | None = None,
|
|
254
353
|
) -> litellm.types.utils.ModelResponse | tuple[Response, AsyncStream[ChatCompletionChunk]]:
|
|
255
|
-
|
|
354
|
+
# Preserve reasoning messages for tool calls when reasoning is on
|
|
355
|
+
# This is needed for models like Claude 4 Sonnet/Opus which support interleaved thinking
|
|
356
|
+
preserve_thinking_blocks = (
|
|
357
|
+
model_settings.reasoning is not None and model_settings.reasoning.effort is not None
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
converted_messages = Converter.items_to_messages(
|
|
361
|
+
input,
|
|
362
|
+
preserve_thinking_blocks=preserve_thinking_blocks,
|
|
363
|
+
preserve_tool_output_all_content=True,
|
|
364
|
+
model=self.model,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Fix message ordering: reorder to ensure tool_use comes before tool_result.
|
|
368
|
+
# Required for Anthropic and Vertex AI Gemini APIs which reject tool responses without preceding tool calls. # noqa: E501
|
|
369
|
+
if any(model.lower() in self.model.lower() for model in ["anthropic", "claude", "gemini"]):
|
|
370
|
+
converted_messages = self._fix_tool_message_ordering(converted_messages)
|
|
371
|
+
|
|
372
|
+
# Convert Google's extra_content to litellm's provider_specific_fields format
|
|
373
|
+
if "gemini" in self.model.lower():
|
|
374
|
+
converted_messages = self._convert_gemini_extra_content_to_provider_specific_fields(
|
|
375
|
+
converted_messages
|
|
376
|
+
)
|
|
256
377
|
|
|
257
378
|
if system_instructions:
|
|
258
379
|
converted_messages.insert(
|
|
@@ -262,6 +383,8 @@ class LitellmModel(Model):
|
|
|
262
383
|
"role": "system",
|
|
263
384
|
},
|
|
264
385
|
)
|
|
386
|
+
converted_messages = _to_dump_compatible(converted_messages)
|
|
387
|
+
|
|
265
388
|
if tracing.include_data():
|
|
266
389
|
span.span_data.input = converted_messages
|
|
267
390
|
|
|
@@ -280,19 +403,61 @@ class LitellmModel(Model):
|
|
|
280
403
|
for handoff in handoffs:
|
|
281
404
|
converted_tools.append(Converter.convert_handoff_tool(handoff))
|
|
282
405
|
|
|
406
|
+
converted_tools = _to_dump_compatible(converted_tools)
|
|
407
|
+
|
|
283
408
|
if _debug.DONT_LOG_MODEL_DATA:
|
|
284
409
|
logger.debug("Calling LLM")
|
|
285
410
|
else:
|
|
411
|
+
messages_json = json.dumps(
|
|
412
|
+
converted_messages,
|
|
413
|
+
indent=2,
|
|
414
|
+
ensure_ascii=False,
|
|
415
|
+
)
|
|
416
|
+
tools_json = json.dumps(
|
|
417
|
+
converted_tools,
|
|
418
|
+
indent=2,
|
|
419
|
+
ensure_ascii=False,
|
|
420
|
+
)
|
|
286
421
|
logger.debug(
|
|
287
422
|
f"Calling Litellm model: {self.model}\n"
|
|
288
|
-
f"{
|
|
289
|
-
f"Tools:\n{
|
|
423
|
+
f"{messages_json}\n"
|
|
424
|
+
f"Tools:\n{tools_json}\n"
|
|
290
425
|
f"Stream: {stream}\n"
|
|
291
426
|
f"Tool choice: {tool_choice}\n"
|
|
292
427
|
f"Response format: {response_format}\n"
|
|
293
428
|
)
|
|
294
429
|
|
|
295
|
-
reasoning_effort
|
|
430
|
+
# Build reasoning_effort - use dict only when summary is present (OpenAI feature)
|
|
431
|
+
# Otherwise pass string for backward compatibility with all providers
|
|
432
|
+
reasoning_effort: dict[str, Any] | str | None = None
|
|
433
|
+
if model_settings.reasoning:
|
|
434
|
+
if model_settings.reasoning.summary is not None:
|
|
435
|
+
# Dict format when summary is needed (OpenAI only)
|
|
436
|
+
reasoning_effort = {
|
|
437
|
+
"effort": model_settings.reasoning.effort,
|
|
438
|
+
"summary": model_settings.reasoning.summary,
|
|
439
|
+
}
|
|
440
|
+
elif model_settings.reasoning.effort is not None:
|
|
441
|
+
# String format for compatibility with all providers
|
|
442
|
+
reasoning_effort = model_settings.reasoning.effort
|
|
443
|
+
|
|
444
|
+
# Enable developers to pass non-OpenAI compatible reasoning_effort data like "none"
|
|
445
|
+
# Priority order:
|
|
446
|
+
# 1. model_settings.reasoning (effort + summary)
|
|
447
|
+
# 2. model_settings.extra_body["reasoning_effort"]
|
|
448
|
+
# 3. model_settings.extra_args["reasoning_effort"]
|
|
449
|
+
if (
|
|
450
|
+
reasoning_effort is None # Unset in model_settings
|
|
451
|
+
and isinstance(model_settings.extra_body, dict)
|
|
452
|
+
and "reasoning_effort" in model_settings.extra_body
|
|
453
|
+
):
|
|
454
|
+
reasoning_effort = model_settings.extra_body["reasoning_effort"]
|
|
455
|
+
if (
|
|
456
|
+
reasoning_effort is None # Unset in both model_settings and model_settings.extra_body
|
|
457
|
+
and model_settings.extra_args
|
|
458
|
+
and "reasoning_effort" in model_settings.extra_args
|
|
459
|
+
):
|
|
460
|
+
reasoning_effort = model_settings.extra_args["reasoning_effort"]
|
|
296
461
|
|
|
297
462
|
stream_options = None
|
|
298
463
|
if stream and model_settings.include_usage is not None:
|
|
@@ -300,9 +465,9 @@ class LitellmModel(Model):
|
|
|
300
465
|
|
|
301
466
|
extra_kwargs = {}
|
|
302
467
|
if model_settings.extra_query:
|
|
303
|
-
extra_kwargs["extra_query"] = model_settings.extra_query
|
|
468
|
+
extra_kwargs["extra_query"] = copy(model_settings.extra_query)
|
|
304
469
|
if model_settings.metadata:
|
|
305
|
-
extra_kwargs["metadata"] = model_settings.metadata
|
|
470
|
+
extra_kwargs["metadata"] = copy(model_settings.metadata)
|
|
306
471
|
if model_settings.extra_body and isinstance(model_settings.extra_body, dict):
|
|
307
472
|
extra_kwargs.update(model_settings.extra_body)
|
|
308
473
|
|
|
@@ -310,6 +475,9 @@ class LitellmModel(Model):
|
|
|
310
475
|
if model_settings.extra_args:
|
|
311
476
|
extra_kwargs.update(model_settings.extra_args)
|
|
312
477
|
|
|
478
|
+
# Prevent duplicate reasoning_effort kwargs when it was promoted to a top-level argument.
|
|
479
|
+
extra_kwargs.pop("reasoning_effort", None)
|
|
480
|
+
|
|
313
481
|
ret = await litellm.acompletion(
|
|
314
482
|
model=self.model,
|
|
315
483
|
messages=converted_messages,
|
|
@@ -326,7 +494,7 @@ class LitellmModel(Model):
|
|
|
326
494
|
stream_options=stream_options,
|
|
327
495
|
reasoning_effort=reasoning_effort,
|
|
328
496
|
top_logprobs=model_settings.top_logprobs,
|
|
329
|
-
extra_headers=
|
|
497
|
+
extra_headers=self._merge_headers(model_settings),
|
|
330
498
|
api_key=self.api_key,
|
|
331
499
|
base_url=self.base_url,
|
|
332
500
|
**extra_kwargs,
|
|
@@ -335,15 +503,19 @@ class LitellmModel(Model):
|
|
|
335
503
|
if isinstance(ret, litellm.types.utils.ModelResponse):
|
|
336
504
|
return ret
|
|
337
505
|
|
|
506
|
+
responses_tool_choice = OpenAIResponsesConverter.convert_tool_choice(
|
|
507
|
+
model_settings.tool_choice
|
|
508
|
+
)
|
|
509
|
+
if responses_tool_choice is None or responses_tool_choice is omit:
|
|
510
|
+
responses_tool_choice = "auto"
|
|
511
|
+
|
|
338
512
|
response = Response(
|
|
339
513
|
id=FAKE_RESPONSES_ID,
|
|
340
514
|
created_at=time.time(),
|
|
341
515
|
model=self.model,
|
|
342
516
|
object="response",
|
|
343
517
|
output=[],
|
|
344
|
-
tool_choice=
|
|
345
|
-
if tool_choice != NOT_GIVEN
|
|
346
|
-
else "auto",
|
|
518
|
+
tool_choice=responses_tool_choice, # type: ignore[arg-type]
|
|
347
519
|
top_p=model_settings.top_p,
|
|
348
520
|
temperature=model_settings.temperature,
|
|
349
521
|
tools=[],
|
|
@@ -352,22 +524,212 @@ class LitellmModel(Model):
|
|
|
352
524
|
)
|
|
353
525
|
return response, ret
|
|
354
526
|
|
|
527
|
+
def _convert_gemini_extra_content_to_provider_specific_fields(
|
|
528
|
+
self, messages: list[ChatCompletionMessageParam]
|
|
529
|
+
) -> list[ChatCompletionMessageParam]:
|
|
530
|
+
"""
|
|
531
|
+
Convert Gemini model's extra_content format to provider_specific_fields format for litellm.
|
|
532
|
+
|
|
533
|
+
Transforms tool calls from internal format:
|
|
534
|
+
extra_content={"google": {"thought_signature": "..."}}
|
|
535
|
+
To litellm format:
|
|
536
|
+
provider_specific_fields={"thought_signature": "..."}
|
|
537
|
+
|
|
538
|
+
Only processes tool_calls that appear after the last user message.
|
|
539
|
+
See: https://ai.google.dev/gemini-api/docs/thought-signatures
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
# Find the index of the last user message
|
|
543
|
+
last_user_index = -1
|
|
544
|
+
for i in range(len(messages) - 1, -1, -1):
|
|
545
|
+
if isinstance(messages[i], dict) and messages[i].get("role") == "user":
|
|
546
|
+
last_user_index = i
|
|
547
|
+
break
|
|
548
|
+
|
|
549
|
+
for i, message in enumerate(messages):
|
|
550
|
+
if not isinstance(message, dict):
|
|
551
|
+
continue
|
|
552
|
+
|
|
553
|
+
# Only process assistant messages that come after the last user message
|
|
554
|
+
# If no user message found (last_user_index == -1), process all messages
|
|
555
|
+
if last_user_index != -1 and i <= last_user_index:
|
|
556
|
+
continue
|
|
557
|
+
|
|
558
|
+
# Check if this is an assistant message with tool calls
|
|
559
|
+
if message.get("role") == "assistant" and message.get("tool_calls"):
|
|
560
|
+
tool_calls = message.get("tool_calls", [])
|
|
561
|
+
|
|
562
|
+
for tool_call in tool_calls: # type: ignore[attr-defined]
|
|
563
|
+
if not isinstance(tool_call, dict):
|
|
564
|
+
continue
|
|
565
|
+
|
|
566
|
+
# Default to skip validator, overridden if valid thought signature exists
|
|
567
|
+
tool_call["provider_specific_fields"] = {
|
|
568
|
+
"thought_signature": "skip_thought_signature_validator"
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
# Override with actual thought signature if extra_content exists
|
|
572
|
+
if "extra_content" in tool_call:
|
|
573
|
+
extra_content = tool_call.pop("extra_content")
|
|
574
|
+
if isinstance(extra_content, dict):
|
|
575
|
+
# Extract google-specific fields
|
|
576
|
+
google_fields = extra_content.get("google")
|
|
577
|
+
if google_fields and isinstance(google_fields, dict):
|
|
578
|
+
thought_sig = google_fields.get("thought_signature")
|
|
579
|
+
if thought_sig:
|
|
580
|
+
tool_call["provider_specific_fields"] = {
|
|
581
|
+
"thought_signature": thought_sig
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
return messages
|
|
585
|
+
|
|
586
|
+
def _fix_tool_message_ordering(
|
|
587
|
+
self, messages: list[ChatCompletionMessageParam]
|
|
588
|
+
) -> list[ChatCompletionMessageParam]:
|
|
589
|
+
"""
|
|
590
|
+
Fix the ordering of tool messages to ensure tool_use messages come before tool_result messages.
|
|
591
|
+
|
|
592
|
+
Required for Anthropic and Vertex AI Gemini APIs which require tool calls to immediately
|
|
593
|
+
precede their corresponding tool responses in conversation history.
|
|
594
|
+
""" # noqa: E501
|
|
595
|
+
if not messages:
|
|
596
|
+
return messages
|
|
597
|
+
|
|
598
|
+
# Collect all tool calls and tool results
|
|
599
|
+
tool_call_messages = {} # tool_id -> (index, message)
|
|
600
|
+
tool_result_messages = {} # tool_id -> (index, message)
|
|
601
|
+
other_messages = [] # (index, message) for non-tool messages
|
|
602
|
+
|
|
603
|
+
for i, message in enumerate(messages):
|
|
604
|
+
if not isinstance(message, dict):
|
|
605
|
+
other_messages.append((i, message))
|
|
606
|
+
continue
|
|
607
|
+
|
|
608
|
+
role = message.get("role")
|
|
609
|
+
|
|
610
|
+
if role == "assistant" and message.get("tool_calls"):
|
|
611
|
+
# Extract tool calls from this assistant message
|
|
612
|
+
tool_calls = message.get("tool_calls", [])
|
|
613
|
+
if isinstance(tool_calls, list):
|
|
614
|
+
for tool_call in tool_calls:
|
|
615
|
+
if isinstance(tool_call, dict):
|
|
616
|
+
tool_id = tool_call.get("id")
|
|
617
|
+
if tool_id:
|
|
618
|
+
# Create a separate assistant message for each tool call
|
|
619
|
+
single_tool_msg = cast(dict[str, Any], message.copy())
|
|
620
|
+
single_tool_msg["tool_calls"] = [tool_call]
|
|
621
|
+
tool_call_messages[tool_id] = (
|
|
622
|
+
i,
|
|
623
|
+
cast(ChatCompletionMessageParam, single_tool_msg),
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
elif role == "tool":
|
|
627
|
+
tool_call_id = message.get("tool_call_id")
|
|
628
|
+
if tool_call_id:
|
|
629
|
+
tool_result_messages[tool_call_id] = (i, message)
|
|
630
|
+
else:
|
|
631
|
+
other_messages.append((i, message))
|
|
632
|
+
else:
|
|
633
|
+
other_messages.append((i, message))
|
|
634
|
+
|
|
635
|
+
# First, identify which tool results will be paired to avoid duplicates
|
|
636
|
+
paired_tool_result_indices = set()
|
|
637
|
+
for tool_id in tool_call_messages:
|
|
638
|
+
if tool_id in tool_result_messages:
|
|
639
|
+
tool_result_idx, _ = tool_result_messages[tool_id]
|
|
640
|
+
paired_tool_result_indices.add(tool_result_idx)
|
|
641
|
+
|
|
642
|
+
# Create the fixed message sequence
|
|
643
|
+
fixed_messages: list[ChatCompletionMessageParam] = []
|
|
644
|
+
used_indices = set()
|
|
645
|
+
|
|
646
|
+
# Add messages in their original order, but ensure tool_use → tool_result pairing
|
|
647
|
+
for i, original_message in enumerate(messages):
|
|
648
|
+
if i in used_indices:
|
|
649
|
+
continue
|
|
650
|
+
|
|
651
|
+
if not isinstance(original_message, dict):
|
|
652
|
+
fixed_messages.append(original_message)
|
|
653
|
+
used_indices.add(i)
|
|
654
|
+
continue
|
|
655
|
+
|
|
656
|
+
role = original_message.get("role")
|
|
657
|
+
|
|
658
|
+
if role == "assistant" and original_message.get("tool_calls"):
|
|
659
|
+
# Process each tool call in this assistant message
|
|
660
|
+
tool_calls = original_message.get("tool_calls", [])
|
|
661
|
+
if isinstance(tool_calls, list):
|
|
662
|
+
for tool_call in tool_calls:
|
|
663
|
+
if isinstance(tool_call, dict):
|
|
664
|
+
tool_id = tool_call.get("id")
|
|
665
|
+
if (
|
|
666
|
+
tool_id
|
|
667
|
+
and tool_id in tool_call_messages
|
|
668
|
+
and tool_id in tool_result_messages
|
|
669
|
+
):
|
|
670
|
+
# Add tool_use → tool_result pair
|
|
671
|
+
_, tool_call_msg = tool_call_messages[tool_id]
|
|
672
|
+
tool_result_idx, tool_result_msg = tool_result_messages[tool_id]
|
|
673
|
+
|
|
674
|
+
fixed_messages.append(tool_call_msg)
|
|
675
|
+
fixed_messages.append(tool_result_msg)
|
|
676
|
+
|
|
677
|
+
# Mark both as used
|
|
678
|
+
used_indices.add(tool_call_messages[tool_id][0])
|
|
679
|
+
used_indices.add(tool_result_idx)
|
|
680
|
+
elif tool_id and tool_id in tool_call_messages:
|
|
681
|
+
# Tool call without result - add just the tool call
|
|
682
|
+
_, tool_call_msg = tool_call_messages[tool_id]
|
|
683
|
+
fixed_messages.append(tool_call_msg)
|
|
684
|
+
used_indices.add(tool_call_messages[tool_id][0])
|
|
685
|
+
|
|
686
|
+
used_indices.add(i) # Mark original multi-tool message as used
|
|
687
|
+
|
|
688
|
+
elif role == "tool":
|
|
689
|
+
# Only preserve unmatched tool results to avoid duplicates
|
|
690
|
+
if i not in paired_tool_result_indices:
|
|
691
|
+
fixed_messages.append(original_message)
|
|
692
|
+
used_indices.add(i)
|
|
693
|
+
|
|
694
|
+
else:
|
|
695
|
+
# Regular message - add it normally
|
|
696
|
+
fixed_messages.append(original_message)
|
|
697
|
+
used_indices.add(i)
|
|
698
|
+
|
|
699
|
+
return fixed_messages
|
|
700
|
+
|
|
355
701
|
def _remove_not_given(self, value: Any) -> Any:
|
|
356
|
-
if isinstance(value, NotGiven):
|
|
702
|
+
if value is omit or isinstance(value, NotGiven):
|
|
357
703
|
return None
|
|
358
704
|
return value
|
|
359
705
|
|
|
706
|
+
def _merge_headers(self, model_settings: ModelSettings):
|
|
707
|
+
return {**HEADERS, **(model_settings.extra_headers or {}), **(HEADERS_OVERRIDE.get() or {})}
|
|
708
|
+
|
|
360
709
|
|
|
361
710
|
class LitellmConverter:
|
|
362
711
|
@classmethod
|
|
363
712
|
def convert_message_to_openai(
|
|
364
|
-
cls, message: litellm.types.utils.Message
|
|
713
|
+
cls, message: litellm.types.utils.Message, model: str | None = None
|
|
365
714
|
) -> ChatCompletionMessage:
|
|
715
|
+
"""
|
|
716
|
+
Convert a LiteLLM message to OpenAI ChatCompletionMessage format.
|
|
717
|
+
|
|
718
|
+
Args:
|
|
719
|
+
message: The LiteLLM message to convert
|
|
720
|
+
model: The target model to convert to. Used to handle provider-specific
|
|
721
|
+
transformations.
|
|
722
|
+
"""
|
|
366
723
|
if message.role != "assistant":
|
|
367
724
|
raise ModelBehaviorError(f"Unsupported role: {message.role}")
|
|
368
725
|
|
|
369
|
-
tool_calls:
|
|
370
|
-
[
|
|
726
|
+
tool_calls: (
|
|
727
|
+
list[ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall] | None
|
|
728
|
+
) = (
|
|
729
|
+
[
|
|
730
|
+
LitellmConverter.convert_tool_call_to_openai(tool, model=model)
|
|
731
|
+
for tool in message.tool_calls
|
|
732
|
+
]
|
|
371
733
|
if message.tool_calls
|
|
372
734
|
else None
|
|
373
735
|
)
|
|
@@ -381,6 +743,26 @@ class LitellmConverter:
|
|
|
381
743
|
if hasattr(message, "reasoning_content") and message.reasoning_content:
|
|
382
744
|
reasoning_content = message.reasoning_content
|
|
383
745
|
|
|
746
|
+
# Extract full thinking blocks including signatures (for Anthropic)
|
|
747
|
+
thinking_blocks: list[dict[str, Any]] | None = None
|
|
748
|
+
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
|
|
749
|
+
# Convert thinking blocks to dict format for compatibility
|
|
750
|
+
thinking_blocks = []
|
|
751
|
+
for block in message.thinking_blocks:
|
|
752
|
+
if isinstance(block, dict):
|
|
753
|
+
thinking_blocks.append(cast(dict[str, Any], block))
|
|
754
|
+
else:
|
|
755
|
+
# Convert object to dict by accessing its attributes
|
|
756
|
+
block_dict: dict[str, Any] = {}
|
|
757
|
+
if hasattr(block, "__dict__"):
|
|
758
|
+
block_dict = dict(block.__dict__.items())
|
|
759
|
+
elif hasattr(block, "model_dump"):
|
|
760
|
+
block_dict = block.model_dump()
|
|
761
|
+
else:
|
|
762
|
+
# Last resort: convert to string representation
|
|
763
|
+
block_dict = {"thinking": str(block)}
|
|
764
|
+
thinking_blocks.append(block_dict)
|
|
765
|
+
|
|
384
766
|
return InternalChatCompletionMessage(
|
|
385
767
|
content=message.content,
|
|
386
768
|
refusal=refusal,
|
|
@@ -389,6 +771,7 @@ class LitellmConverter:
|
|
|
389
771
|
audio=message.get("audio", None), # litellm deletes audio if not present
|
|
390
772
|
tool_calls=tool_calls,
|
|
391
773
|
reasoning_content=reasoning_content,
|
|
774
|
+
thinking_blocks=thinking_blocks,
|
|
392
775
|
)
|
|
393
776
|
|
|
394
777
|
@classmethod
|
|
@@ -416,13 +799,43 @@ class LitellmConverter:
|
|
|
416
799
|
|
|
417
800
|
@classmethod
|
|
418
801
|
def convert_tool_call_to_openai(
|
|
419
|
-
cls, tool_call: litellm.types.utils.ChatCompletionMessageToolCall
|
|
802
|
+
cls, tool_call: litellm.types.utils.ChatCompletionMessageToolCall, model: str | None = None
|
|
420
803
|
) -> ChatCompletionMessageFunctionToolCall:
|
|
421
|
-
|
|
422
|
-
|
|
804
|
+
# Clean up litellm's addition of __thought__ suffix to tool_call.id for
|
|
805
|
+
# Gemini models. See: https://github.com/BerriAI/litellm/pull/16895
|
|
806
|
+
# This suffix is redundant since we can get thought_signature from
|
|
807
|
+
# provider_specific_fields, and this hack causes validation errors when
|
|
808
|
+
# cross-model passing to other models.
|
|
809
|
+
tool_call_id = tool_call.id
|
|
810
|
+
if model and "gemini" in model.lower() and "__thought__" in tool_call_id:
|
|
811
|
+
tool_call_id = tool_call_id.split("__thought__")[0]
|
|
812
|
+
|
|
813
|
+
# Convert litellm's tool call format to chat completion message format
|
|
814
|
+
base_tool_call = ChatCompletionMessageFunctionToolCall(
|
|
815
|
+
id=tool_call_id,
|
|
423
816
|
type="function",
|
|
424
817
|
function=Function(
|
|
425
818
|
name=tool_call.function.name or "",
|
|
426
819
|
arguments=tool_call.function.arguments,
|
|
427
820
|
),
|
|
428
821
|
)
|
|
822
|
+
|
|
823
|
+
# Preserve provider-specific fields if present (e.g., Gemini thought signatures)
|
|
824
|
+
if hasattr(tool_call, "provider_specific_fields") and tool_call.provider_specific_fields:
|
|
825
|
+
# Convert to nested extra_content structure
|
|
826
|
+
extra_content: dict[str, Any] = {}
|
|
827
|
+
provider_fields = tool_call.provider_specific_fields
|
|
828
|
+
|
|
829
|
+
# Check for thought_signature (Gemini specific)
|
|
830
|
+
if model and "gemini" in model.lower():
|
|
831
|
+
if "thought_signature" in provider_fields:
|
|
832
|
+
extra_content["google"] = {
|
|
833
|
+
"thought_signature": provider_fields["thought_signature"]
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
return InternalToolCall(
|
|
837
|
+
**base_tool_call.model_dump(),
|
|
838
|
+
extra_content=extra_content if extra_content else None,
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
return base_tool_call
|