openai-agents 0.2.8__py3-none-any.whl → 0.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. agents/__init__.py +105 -4
  2. agents/_debug.py +15 -4
  3. agents/_run_impl.py +1203 -96
  4. agents/agent.py +164 -19
  5. agents/apply_diff.py +329 -0
  6. agents/editor.py +47 -0
  7. agents/exceptions.py +35 -0
  8. agents/extensions/experimental/__init__.py +6 -0
  9. agents/extensions/experimental/codex/__init__.py +92 -0
  10. agents/extensions/experimental/codex/codex.py +89 -0
  11. agents/extensions/experimental/codex/codex_options.py +35 -0
  12. agents/extensions/experimental/codex/codex_tool.py +1142 -0
  13. agents/extensions/experimental/codex/events.py +162 -0
  14. agents/extensions/experimental/codex/exec.py +263 -0
  15. agents/extensions/experimental/codex/items.py +245 -0
  16. agents/extensions/experimental/codex/output_schema_file.py +50 -0
  17. agents/extensions/experimental/codex/payloads.py +31 -0
  18. agents/extensions/experimental/codex/thread.py +214 -0
  19. agents/extensions/experimental/codex/thread_options.py +54 -0
  20. agents/extensions/experimental/codex/turn_options.py +36 -0
  21. agents/extensions/handoff_filters.py +13 -1
  22. agents/extensions/memory/__init__.py +120 -0
  23. agents/extensions/memory/advanced_sqlite_session.py +1285 -0
  24. agents/extensions/memory/async_sqlite_session.py +239 -0
  25. agents/extensions/memory/dapr_session.py +423 -0
  26. agents/extensions/memory/encrypt_session.py +185 -0
  27. agents/extensions/memory/redis_session.py +261 -0
  28. agents/extensions/memory/sqlalchemy_session.py +334 -0
  29. agents/extensions/models/litellm_model.py +449 -36
  30. agents/extensions/models/litellm_provider.py +3 -1
  31. agents/function_schema.py +47 -5
  32. agents/guardrail.py +16 -2
  33. agents/{handoffs.py → handoffs/__init__.py} +89 -47
  34. agents/handoffs/history.py +268 -0
  35. agents/items.py +237 -11
  36. agents/lifecycle.py +75 -14
  37. agents/mcp/server.py +280 -37
  38. agents/mcp/util.py +24 -3
  39. agents/memory/__init__.py +22 -2
  40. agents/memory/openai_conversations_session.py +91 -0
  41. agents/memory/openai_responses_compaction_session.py +249 -0
  42. agents/memory/session.py +19 -261
  43. agents/memory/sqlite_session.py +275 -0
  44. agents/memory/util.py +20 -0
  45. agents/model_settings.py +14 -3
  46. agents/models/__init__.py +13 -0
  47. agents/models/chatcmpl_converter.py +303 -50
  48. agents/models/chatcmpl_helpers.py +63 -0
  49. agents/models/chatcmpl_stream_handler.py +290 -68
  50. agents/models/default_models.py +58 -0
  51. agents/models/interface.py +4 -0
  52. agents/models/openai_chatcompletions.py +103 -49
  53. agents/models/openai_provider.py +10 -4
  54. agents/models/openai_responses.py +162 -46
  55. agents/realtime/__init__.py +4 -0
  56. agents/realtime/_util.py +14 -3
  57. agents/realtime/agent.py +7 -0
  58. agents/realtime/audio_formats.py +53 -0
  59. agents/realtime/config.py +78 -10
  60. agents/realtime/events.py +18 -0
  61. agents/realtime/handoffs.py +2 -2
  62. agents/realtime/items.py +17 -1
  63. agents/realtime/model.py +13 -0
  64. agents/realtime/model_events.py +12 -0
  65. agents/realtime/model_inputs.py +18 -1
  66. agents/realtime/openai_realtime.py +696 -150
  67. agents/realtime/session.py +243 -23
  68. agents/repl.py +7 -3
  69. agents/result.py +197 -38
  70. agents/run.py +949 -168
  71. agents/run_context.py +13 -2
  72. agents/stream_events.py +1 -0
  73. agents/strict_schema.py +14 -0
  74. agents/tool.py +413 -15
  75. agents/tool_context.py +22 -1
  76. agents/tool_guardrails.py +279 -0
  77. agents/tracing/__init__.py +2 -0
  78. agents/tracing/config.py +9 -0
  79. agents/tracing/create.py +4 -0
  80. agents/tracing/processor_interface.py +84 -11
  81. agents/tracing/processors.py +65 -54
  82. agents/tracing/provider.py +64 -7
  83. agents/tracing/spans.py +105 -0
  84. agents/tracing/traces.py +116 -16
  85. agents/usage.py +134 -12
  86. agents/util/_json.py +19 -1
  87. agents/util/_transforms.py +12 -2
  88. agents/voice/input.py +5 -4
  89. agents/voice/models/openai_stt.py +17 -9
  90. agents/voice/pipeline.py +2 -0
  91. agents/voice/pipeline_config.py +4 -0
  92. {openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/METADATA +44 -19
  93. openai_agents-0.6.8.dist-info/RECORD +134 -0
  94. {openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/WHEEL +1 -1
  95. openai_agents-0.2.8.dist-info/RECORD +0 -103
  96. {openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import os
4
5
  import time
5
6
  from collections.abc import AsyncIterator
7
+ from copy import copy
6
8
  from typing import Any, Literal, cast, overload
7
9
 
8
10
  from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
@@ -17,10 +19,12 @@ except ImportError as _e:
17
19
  "dependency group: `pip install 'openai-agents[litellm]'`."
18
20
  ) from _e
19
21
 
20
- from openai import NOT_GIVEN, AsyncStream, NotGiven
22
+ from openai import AsyncStream, NotGiven, omit
21
23
  from openai.types.chat import (
22
24
  ChatCompletionChunk,
25
+ ChatCompletionMessageCustomToolCall,
23
26
  ChatCompletionMessageFunctionToolCall,
27
+ ChatCompletionMessageParam,
24
28
  )
25
29
  from openai.types.chat.chat_completion_message import (
26
30
  Annotation,
@@ -28,8 +32,8 @@ from openai.types.chat.chat_completion_message import (
28
32
  ChatCompletionMessage,
29
33
  )
30
34
  from openai.types.chat.chat_completion_message_function_tool_call import Function
31
- from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
32
35
  from openai.types.responses import Response
36
+ from pydantic import BaseModel
33
37
 
34
38
  from ... import _debug
35
39
  from ...agent_output import AgentOutputSchemaBase
@@ -38,23 +42,94 @@ from ...items import ModelResponse, TResponseInputItem, TResponseStreamEvent
38
42
  from ...logger import logger
39
43
  from ...model_settings import ModelSettings
40
44
  from ...models.chatcmpl_converter import Converter
41
- from ...models.chatcmpl_helpers import HEADERS
45
+ from ...models.chatcmpl_helpers import HEADERS, HEADERS_OVERRIDE
42
46
  from ...models.chatcmpl_stream_handler import ChatCmplStreamHandler
43
47
  from ...models.fake_id import FAKE_RESPONSES_ID
44
48
  from ...models.interface import Model, ModelTracing
49
+ from ...models.openai_responses import Converter as OpenAIResponsesConverter
45
50
  from ...tool import Tool
46
51
  from ...tracing import generation_span
47
52
  from ...tracing.span_data import GenerationSpanData
48
53
  from ...tracing.spans import Span
49
54
  from ...usage import Usage
55
+ from ...util._json import _to_dump_compatible
56
+
57
+
58
+ def _patch_litellm_serializer_warnings() -> None:
59
+ """Ensure LiteLLM logging uses model_dump(warnings=False) when available."""
60
+ # Background: LiteLLM emits Pydantic serializer warnings for Message/Choices mismatches.
61
+ # See: https://github.com/BerriAI/litellm/issues/11759
62
+ # This patch relies on a private LiteLLM helper; if the name or signature changes,
63
+ # the wrapper should no-op or fall back to LiteLLM's default behavior. Revisit on upgrade.
64
+ # Remove this patch once the LiteLLM issue is resolved.
65
+
66
+ try:
67
+ from litellm.litellm_core_utils import litellm_logging as _litellm_logging
68
+ except Exception:
69
+ return
70
+
71
+ # Guard against double-patching if this module is imported multiple times.
72
+ if getattr(_litellm_logging, "_openai_agents_patched_serializer_warnings", False):
73
+ return
74
+
75
+ original = getattr(_litellm_logging, "_extract_response_obj_and_hidden_params", None)
76
+ if original is None:
77
+ return
78
+
79
+ def _wrapped_extract_response_obj_and_hidden_params(*args, **kwargs):
80
+ # init_response_obj is LiteLLM's raw response container (often a Pydantic BaseModel).
81
+ # Accept arbitrary args to stay compatible if LiteLLM changes the signature.
82
+ init_response_obj = args[0] if args else kwargs.get("init_response_obj")
83
+ if isinstance(init_response_obj, BaseModel):
84
+ hidden_params = getattr(init_response_obj, "_hidden_params", None)
85
+ try:
86
+ response_obj = init_response_obj.model_dump(warnings=False)
87
+ except TypeError:
88
+ response_obj = init_response_obj.model_dump()
89
+ if args:
90
+ response_obj_out, original_hidden = original(response_obj, *args[1:], **kwargs)
91
+ else:
92
+ updated_kwargs = dict(kwargs)
93
+ updated_kwargs["init_response_obj"] = response_obj
94
+ response_obj_out, original_hidden = original(**updated_kwargs)
95
+ return response_obj_out, hidden_params or original_hidden
96
+
97
+ return original(*args, **kwargs)
98
+
99
+ setattr( # noqa: B010
100
+ _litellm_logging,
101
+ "_extract_response_obj_and_hidden_params",
102
+ _wrapped_extract_response_obj_and_hidden_params,
103
+ )
104
+ setattr( # noqa: B010
105
+ _litellm_logging,
106
+ "_openai_agents_patched_serializer_warnings",
107
+ True,
108
+ )
109
+
110
+
111
+ # Set OPENAI_AGENTS_ENABLE_LITELLM_SERIALIZER_PATCH=true to opt in.
112
+ _enable_litellm_patch = os.getenv("OPENAI_AGENTS_ENABLE_LITELLM_SERIALIZER_PATCH", "")
113
+ if _enable_litellm_patch.lower() in ("1", "true"):
114
+ _patch_litellm_serializer_warnings()
50
115
 
51
116
 
52
117
  class InternalChatCompletionMessage(ChatCompletionMessage):
53
118
  """
54
- An internal subclass to carry reasoning_content without modifying the original model.
55
- """
119
+ An internal subclass to carry reasoning_content and thinking_blocks without modifying the original model.
120
+ """ # noqa: E501
56
121
 
57
122
  reasoning_content: str
123
+ thinking_blocks: list[dict[str, Any]] | None = None
124
+
125
+
126
+ class InternalToolCall(ChatCompletionMessageFunctionToolCall):
127
+ """
128
+ An internal subclass to carry provider-specific metadata (e.g., Gemini thought signatures)
129
+ without modifying the original model.
130
+ """
131
+
132
+ extra_content: dict[str, Any] | None = None
58
133
 
59
134
 
60
135
  class LitellmModel(Model):
@@ -82,7 +157,8 @@ class LitellmModel(Model):
82
157
  output_schema: AgentOutputSchemaBase | None,
83
158
  handoffs: list[Handoff],
84
159
  tracing: ModelTracing,
85
- previous_response_id: str | None,
160
+ previous_response_id: str | None = None, # unused
161
+ conversation_id: str | None = None, # unused
86
162
  prompt: Any | None = None,
87
163
  ) -> ModelResponse:
88
164
  with generation_span(
@@ -104,18 +180,26 @@ class LitellmModel(Model):
104
180
  prompt=prompt,
105
181
  )
106
182
 
107
- assert isinstance(response.choices[0], litellm.types.utils.Choices)
183
+ message: litellm.types.utils.Message | None = None
184
+ first_choice: litellm.types.utils.Choices | None = None
185
+ if response.choices and len(response.choices) > 0:
186
+ choice = response.choices[0]
187
+ if isinstance(choice, litellm.types.utils.Choices):
188
+ first_choice = choice
189
+ message = first_choice.message
108
190
 
109
191
  if _debug.DONT_LOG_MODEL_DATA:
110
192
  logger.debug("Received model response")
111
193
  else:
112
- logger.debug(
113
- f"""LLM resp:\n{
114
- json.dumps(
115
- response.choices[0].message.model_dump(), indent=2, ensure_ascii=False
116
- )
117
- }\n"""
118
- )
194
+ if message is not None:
195
+ logger.debug(
196
+ f"""LLM resp:\n{
197
+ json.dumps(message.model_dump(), indent=2, ensure_ascii=False)
198
+ }\n"""
199
+ )
200
+ else:
201
+ finish_reason = first_choice.finish_reason if first_choice else "-"
202
+ logger.debug(f"LLM resp had no message. finish_reason: {finish_reason}")
119
203
 
120
204
  if hasattr(response, "usage"):
121
205
  response_usage = response.usage
@@ -146,14 +230,26 @@ class LitellmModel(Model):
146
230
  logger.warning("No usage information returned from Litellm")
147
231
 
148
232
  if tracing.include_data():
149
- span_generation.span_data.output = [response.choices[0].message.model_dump()]
233
+ span_generation.span_data.output = (
234
+ [message.model_dump()] if message is not None else []
235
+ )
150
236
  span_generation.span_data.usage = {
151
237
  "input_tokens": usage.input_tokens,
152
238
  "output_tokens": usage.output_tokens,
153
239
  }
154
240
 
155
- items = Converter.message_to_output_items(
156
- LitellmConverter.convert_message_to_openai(response.choices[0].message)
241
+ # Build provider_data for provider specific fields
242
+ provider_data: dict[str, Any] = {"model": self.model}
243
+ if message is not None and hasattr(response, "id"):
244
+ provider_data["response_id"] = response.id
245
+
246
+ items = (
247
+ Converter.message_to_output_items(
248
+ LitellmConverter.convert_message_to_openai(message, model=self.model),
249
+ provider_data=provider_data,
250
+ )
251
+ if message is not None
252
+ else []
157
253
  )
158
254
 
159
255
  return ModelResponse(
@@ -171,7 +267,8 @@ class LitellmModel(Model):
171
267
  output_schema: AgentOutputSchemaBase | None,
172
268
  handoffs: list[Handoff],
173
269
  tracing: ModelTracing,
174
- previous_response_id: str | None,
270
+ previous_response_id: str | None = None, # unused
271
+ conversation_id: str | None = None, # unused
175
272
  prompt: Any | None = None,
176
273
  ) -> AsyncIterator[TResponseStreamEvent]:
177
274
  with generation_span(
@@ -194,7 +291,9 @@ class LitellmModel(Model):
194
291
  )
195
292
 
196
293
  final_response: Response | None = None
197
- async for chunk in ChatCmplStreamHandler.handle_stream(response, stream):
294
+ async for chunk in ChatCmplStreamHandler.handle_stream(
295
+ response, stream, model=self.model
296
+ ):
198
297
  yield chunk
199
298
 
200
299
  if chunk.type == "response.completed":
@@ -252,7 +351,29 @@ class LitellmModel(Model):
252
351
  stream: bool = False,
253
352
  prompt: Any | None = None,
254
353
  ) -> litellm.types.utils.ModelResponse | tuple[Response, AsyncStream[ChatCompletionChunk]]:
255
- converted_messages = Converter.items_to_messages(input)
354
+ # Preserve reasoning messages for tool calls when reasoning is on
355
+ # This is needed for models like Claude 4 Sonnet/Opus which support interleaved thinking
356
+ preserve_thinking_blocks = (
357
+ model_settings.reasoning is not None and model_settings.reasoning.effort is not None
358
+ )
359
+
360
+ converted_messages = Converter.items_to_messages(
361
+ input,
362
+ preserve_thinking_blocks=preserve_thinking_blocks,
363
+ preserve_tool_output_all_content=True,
364
+ model=self.model,
365
+ )
366
+
367
+ # Fix message ordering: reorder to ensure tool_use comes before tool_result.
368
+ # Required for Anthropic and Vertex AI Gemini APIs which reject tool responses without preceding tool calls. # noqa: E501
369
+ if any(model.lower() in self.model.lower() for model in ["anthropic", "claude", "gemini"]):
370
+ converted_messages = self._fix_tool_message_ordering(converted_messages)
371
+
372
+ # Convert Google's extra_content to litellm's provider_specific_fields format
373
+ if "gemini" in self.model.lower():
374
+ converted_messages = self._convert_gemini_extra_content_to_provider_specific_fields(
375
+ converted_messages
376
+ )
256
377
 
257
378
  if system_instructions:
258
379
  converted_messages.insert(
@@ -262,6 +383,8 @@ class LitellmModel(Model):
262
383
  "role": "system",
263
384
  },
264
385
  )
386
+ converted_messages = _to_dump_compatible(converted_messages)
387
+
265
388
  if tracing.include_data():
266
389
  span.span_data.input = converted_messages
267
390
 
@@ -280,19 +403,61 @@ class LitellmModel(Model):
280
403
  for handoff in handoffs:
281
404
  converted_tools.append(Converter.convert_handoff_tool(handoff))
282
405
 
406
+ converted_tools = _to_dump_compatible(converted_tools)
407
+
283
408
  if _debug.DONT_LOG_MODEL_DATA:
284
409
  logger.debug("Calling LLM")
285
410
  else:
411
+ messages_json = json.dumps(
412
+ converted_messages,
413
+ indent=2,
414
+ ensure_ascii=False,
415
+ )
416
+ tools_json = json.dumps(
417
+ converted_tools,
418
+ indent=2,
419
+ ensure_ascii=False,
420
+ )
286
421
  logger.debug(
287
422
  f"Calling Litellm model: {self.model}\n"
288
- f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
289
- f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
423
+ f"{messages_json}\n"
424
+ f"Tools:\n{tools_json}\n"
290
425
  f"Stream: {stream}\n"
291
426
  f"Tool choice: {tool_choice}\n"
292
427
  f"Response format: {response_format}\n"
293
428
  )
294
429
 
295
- reasoning_effort = model_settings.reasoning.effort if model_settings.reasoning else None
430
+ # Build reasoning_effort - use dict only when summary is present (OpenAI feature)
431
+ # Otherwise pass string for backward compatibility with all providers
432
+ reasoning_effort: dict[str, Any] | str | None = None
433
+ if model_settings.reasoning:
434
+ if model_settings.reasoning.summary is not None:
435
+ # Dict format when summary is needed (OpenAI only)
436
+ reasoning_effort = {
437
+ "effort": model_settings.reasoning.effort,
438
+ "summary": model_settings.reasoning.summary,
439
+ }
440
+ elif model_settings.reasoning.effort is not None:
441
+ # String format for compatibility with all providers
442
+ reasoning_effort = model_settings.reasoning.effort
443
+
444
+ # Enable developers to pass non-OpenAI compatible reasoning_effort data like "none"
445
+ # Priority order:
446
+ # 1. model_settings.reasoning (effort + summary)
447
+ # 2. model_settings.extra_body["reasoning_effort"]
448
+ # 3. model_settings.extra_args["reasoning_effort"]
449
+ if (
450
+ reasoning_effort is None # Unset in model_settings
451
+ and isinstance(model_settings.extra_body, dict)
452
+ and "reasoning_effort" in model_settings.extra_body
453
+ ):
454
+ reasoning_effort = model_settings.extra_body["reasoning_effort"]
455
+ if (
456
+ reasoning_effort is None # Unset in both model_settings and model_settings.extra_body
457
+ and model_settings.extra_args
458
+ and "reasoning_effort" in model_settings.extra_args
459
+ ):
460
+ reasoning_effort = model_settings.extra_args["reasoning_effort"]
296
461
 
297
462
  stream_options = None
298
463
  if stream and model_settings.include_usage is not None:
@@ -300,9 +465,9 @@ class LitellmModel(Model):
300
465
 
301
466
  extra_kwargs = {}
302
467
  if model_settings.extra_query:
303
- extra_kwargs["extra_query"] = model_settings.extra_query
468
+ extra_kwargs["extra_query"] = copy(model_settings.extra_query)
304
469
  if model_settings.metadata:
305
- extra_kwargs["metadata"] = model_settings.metadata
470
+ extra_kwargs["metadata"] = copy(model_settings.metadata)
306
471
  if model_settings.extra_body and isinstance(model_settings.extra_body, dict):
307
472
  extra_kwargs.update(model_settings.extra_body)
308
473
 
@@ -310,6 +475,9 @@ class LitellmModel(Model):
310
475
  if model_settings.extra_args:
311
476
  extra_kwargs.update(model_settings.extra_args)
312
477
 
478
+ # Prevent duplicate reasoning_effort kwargs when it was promoted to a top-level argument.
479
+ extra_kwargs.pop("reasoning_effort", None)
480
+
313
481
  ret = await litellm.acompletion(
314
482
  model=self.model,
315
483
  messages=converted_messages,
@@ -326,7 +494,7 @@ class LitellmModel(Model):
326
494
  stream_options=stream_options,
327
495
  reasoning_effort=reasoning_effort,
328
496
  top_logprobs=model_settings.top_logprobs,
329
- extra_headers={**HEADERS, **(model_settings.extra_headers or {})},
497
+ extra_headers=self._merge_headers(model_settings),
330
498
  api_key=self.api_key,
331
499
  base_url=self.base_url,
332
500
  **extra_kwargs,
@@ -335,15 +503,19 @@ class LitellmModel(Model):
335
503
  if isinstance(ret, litellm.types.utils.ModelResponse):
336
504
  return ret
337
505
 
506
+ responses_tool_choice = OpenAIResponsesConverter.convert_tool_choice(
507
+ model_settings.tool_choice
508
+ )
509
+ if responses_tool_choice is None or responses_tool_choice is omit:
510
+ responses_tool_choice = "auto"
511
+
338
512
  response = Response(
339
513
  id=FAKE_RESPONSES_ID,
340
514
  created_at=time.time(),
341
515
  model=self.model,
342
516
  object="response",
343
517
  output=[],
344
- tool_choice=cast(Literal["auto", "required", "none"], tool_choice)
345
- if tool_choice != NOT_GIVEN
346
- else "auto",
518
+ tool_choice=responses_tool_choice, # type: ignore[arg-type]
347
519
  top_p=model_settings.top_p,
348
520
  temperature=model_settings.temperature,
349
521
  tools=[],
@@ -352,22 +524,212 @@ class LitellmModel(Model):
352
524
  )
353
525
  return response, ret
354
526
 
527
+ def _convert_gemini_extra_content_to_provider_specific_fields(
528
+ self, messages: list[ChatCompletionMessageParam]
529
+ ) -> list[ChatCompletionMessageParam]:
530
+ """
531
+ Convert Gemini model's extra_content format to provider_specific_fields format for litellm.
532
+
533
+ Transforms tool calls from internal format:
534
+ extra_content={"google": {"thought_signature": "..."}}
535
+ To litellm format:
536
+ provider_specific_fields={"thought_signature": "..."}
537
+
538
+ Only processes tool_calls that appear after the last user message.
539
+ See: https://ai.google.dev/gemini-api/docs/thought-signatures
540
+ """
541
+
542
+ # Find the index of the last user message
543
+ last_user_index = -1
544
+ for i in range(len(messages) - 1, -1, -1):
545
+ if isinstance(messages[i], dict) and messages[i].get("role") == "user":
546
+ last_user_index = i
547
+ break
548
+
549
+ for i, message in enumerate(messages):
550
+ if not isinstance(message, dict):
551
+ continue
552
+
553
+ # Only process assistant messages that come after the last user message
554
+ # If no user message found (last_user_index == -1), process all messages
555
+ if last_user_index != -1 and i <= last_user_index:
556
+ continue
557
+
558
+ # Check if this is an assistant message with tool calls
559
+ if message.get("role") == "assistant" and message.get("tool_calls"):
560
+ tool_calls = message.get("tool_calls", [])
561
+
562
+ for tool_call in tool_calls: # type: ignore[attr-defined]
563
+ if not isinstance(tool_call, dict):
564
+ continue
565
+
566
+ # Default to skip validator, overridden if valid thought signature exists
567
+ tool_call["provider_specific_fields"] = {
568
+ "thought_signature": "skip_thought_signature_validator"
569
+ }
570
+
571
+ # Override with actual thought signature if extra_content exists
572
+ if "extra_content" in tool_call:
573
+ extra_content = tool_call.pop("extra_content")
574
+ if isinstance(extra_content, dict):
575
+ # Extract google-specific fields
576
+ google_fields = extra_content.get("google")
577
+ if google_fields and isinstance(google_fields, dict):
578
+ thought_sig = google_fields.get("thought_signature")
579
+ if thought_sig:
580
+ tool_call["provider_specific_fields"] = {
581
+ "thought_signature": thought_sig
582
+ }
583
+
584
+ return messages
585
+
586
+ def _fix_tool_message_ordering(
587
+ self, messages: list[ChatCompletionMessageParam]
588
+ ) -> list[ChatCompletionMessageParam]:
589
+ """
590
+ Fix the ordering of tool messages to ensure tool_use messages come before tool_result messages.
591
+
592
+ Required for Anthropic and Vertex AI Gemini APIs which require tool calls to immediately
593
+ precede their corresponding tool responses in conversation history.
594
+ """ # noqa: E501
595
+ if not messages:
596
+ return messages
597
+
598
+ # Collect all tool calls and tool results
599
+ tool_call_messages = {} # tool_id -> (index, message)
600
+ tool_result_messages = {} # tool_id -> (index, message)
601
+ other_messages = [] # (index, message) for non-tool messages
602
+
603
+ for i, message in enumerate(messages):
604
+ if not isinstance(message, dict):
605
+ other_messages.append((i, message))
606
+ continue
607
+
608
+ role = message.get("role")
609
+
610
+ if role == "assistant" and message.get("tool_calls"):
611
+ # Extract tool calls from this assistant message
612
+ tool_calls = message.get("tool_calls", [])
613
+ if isinstance(tool_calls, list):
614
+ for tool_call in tool_calls:
615
+ if isinstance(tool_call, dict):
616
+ tool_id = tool_call.get("id")
617
+ if tool_id:
618
+ # Create a separate assistant message for each tool call
619
+ single_tool_msg = cast(dict[str, Any], message.copy())
620
+ single_tool_msg["tool_calls"] = [tool_call]
621
+ tool_call_messages[tool_id] = (
622
+ i,
623
+ cast(ChatCompletionMessageParam, single_tool_msg),
624
+ )
625
+
626
+ elif role == "tool":
627
+ tool_call_id = message.get("tool_call_id")
628
+ if tool_call_id:
629
+ tool_result_messages[tool_call_id] = (i, message)
630
+ else:
631
+ other_messages.append((i, message))
632
+ else:
633
+ other_messages.append((i, message))
634
+
635
+ # First, identify which tool results will be paired to avoid duplicates
636
+ paired_tool_result_indices = set()
637
+ for tool_id in tool_call_messages:
638
+ if tool_id in tool_result_messages:
639
+ tool_result_idx, _ = tool_result_messages[tool_id]
640
+ paired_tool_result_indices.add(tool_result_idx)
641
+
642
+ # Create the fixed message sequence
643
+ fixed_messages: list[ChatCompletionMessageParam] = []
644
+ used_indices = set()
645
+
646
+ # Add messages in their original order, but ensure tool_use → tool_result pairing
647
+ for i, original_message in enumerate(messages):
648
+ if i in used_indices:
649
+ continue
650
+
651
+ if not isinstance(original_message, dict):
652
+ fixed_messages.append(original_message)
653
+ used_indices.add(i)
654
+ continue
655
+
656
+ role = original_message.get("role")
657
+
658
+ if role == "assistant" and original_message.get("tool_calls"):
659
+ # Process each tool call in this assistant message
660
+ tool_calls = original_message.get("tool_calls", [])
661
+ if isinstance(tool_calls, list):
662
+ for tool_call in tool_calls:
663
+ if isinstance(tool_call, dict):
664
+ tool_id = tool_call.get("id")
665
+ if (
666
+ tool_id
667
+ and tool_id in tool_call_messages
668
+ and tool_id in tool_result_messages
669
+ ):
670
+ # Add tool_use → tool_result pair
671
+ _, tool_call_msg = tool_call_messages[tool_id]
672
+ tool_result_idx, tool_result_msg = tool_result_messages[tool_id]
673
+
674
+ fixed_messages.append(tool_call_msg)
675
+ fixed_messages.append(tool_result_msg)
676
+
677
+ # Mark both as used
678
+ used_indices.add(tool_call_messages[tool_id][0])
679
+ used_indices.add(tool_result_idx)
680
+ elif tool_id and tool_id in tool_call_messages:
681
+ # Tool call without result - add just the tool call
682
+ _, tool_call_msg = tool_call_messages[tool_id]
683
+ fixed_messages.append(tool_call_msg)
684
+ used_indices.add(tool_call_messages[tool_id][0])
685
+
686
+ used_indices.add(i) # Mark original multi-tool message as used
687
+
688
+ elif role == "tool":
689
+ # Only preserve unmatched tool results to avoid duplicates
690
+ if i not in paired_tool_result_indices:
691
+ fixed_messages.append(original_message)
692
+ used_indices.add(i)
693
+
694
+ else:
695
+ # Regular message - add it normally
696
+ fixed_messages.append(original_message)
697
+ used_indices.add(i)
698
+
699
+ return fixed_messages
700
+
355
701
  def _remove_not_given(self, value: Any) -> Any:
356
- if isinstance(value, NotGiven):
702
+ if value is omit or isinstance(value, NotGiven):
357
703
  return None
358
704
  return value
359
705
 
706
+ def _merge_headers(self, model_settings: ModelSettings):
707
+ return {**HEADERS, **(model_settings.extra_headers or {}), **(HEADERS_OVERRIDE.get() or {})}
708
+
360
709
 
361
710
  class LitellmConverter:
362
711
  @classmethod
363
712
  def convert_message_to_openai(
364
- cls, message: litellm.types.utils.Message
713
+ cls, message: litellm.types.utils.Message, model: str | None = None
365
714
  ) -> ChatCompletionMessage:
715
+ """
716
+ Convert a LiteLLM message to OpenAI ChatCompletionMessage format.
717
+
718
+ Args:
719
+ message: The LiteLLM message to convert
720
+ model: The target model to convert to. Used to handle provider-specific
721
+ transformations.
722
+ """
366
723
  if message.role != "assistant":
367
724
  raise ModelBehaviorError(f"Unsupported role: {message.role}")
368
725
 
369
- tool_calls: list[ChatCompletionMessageToolCall] | None = (
370
- [LitellmConverter.convert_tool_call_to_openai(tool) for tool in message.tool_calls]
726
+ tool_calls: (
727
+ list[ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall] | None
728
+ ) = (
729
+ [
730
+ LitellmConverter.convert_tool_call_to_openai(tool, model=model)
731
+ for tool in message.tool_calls
732
+ ]
371
733
  if message.tool_calls
372
734
  else None
373
735
  )
@@ -381,6 +743,26 @@ class LitellmConverter:
381
743
  if hasattr(message, "reasoning_content") and message.reasoning_content:
382
744
  reasoning_content = message.reasoning_content
383
745
 
746
+ # Extract full thinking blocks including signatures (for Anthropic)
747
+ thinking_blocks: list[dict[str, Any]] | None = None
748
+ if hasattr(message, "thinking_blocks") and message.thinking_blocks:
749
+ # Convert thinking blocks to dict format for compatibility
750
+ thinking_blocks = []
751
+ for block in message.thinking_blocks:
752
+ if isinstance(block, dict):
753
+ thinking_blocks.append(cast(dict[str, Any], block))
754
+ else:
755
+ # Convert object to dict by accessing its attributes
756
+ block_dict: dict[str, Any] = {}
757
+ if hasattr(block, "__dict__"):
758
+ block_dict = dict(block.__dict__.items())
759
+ elif hasattr(block, "model_dump"):
760
+ block_dict = block.model_dump()
761
+ else:
762
+ # Last resort: convert to string representation
763
+ block_dict = {"thinking": str(block)}
764
+ thinking_blocks.append(block_dict)
765
+
384
766
  return InternalChatCompletionMessage(
385
767
  content=message.content,
386
768
  refusal=refusal,
@@ -389,6 +771,7 @@ class LitellmConverter:
389
771
  audio=message.get("audio", None), # litellm deletes audio if not present
390
772
  tool_calls=tool_calls,
391
773
  reasoning_content=reasoning_content,
774
+ thinking_blocks=thinking_blocks,
392
775
  )
393
776
 
394
777
  @classmethod
@@ -416,13 +799,43 @@ class LitellmConverter:
416
799
 
417
800
  @classmethod
418
801
  def convert_tool_call_to_openai(
419
- cls, tool_call: litellm.types.utils.ChatCompletionMessageToolCall
802
+ cls, tool_call: litellm.types.utils.ChatCompletionMessageToolCall, model: str | None = None
420
803
  ) -> ChatCompletionMessageFunctionToolCall:
421
- return ChatCompletionMessageFunctionToolCall(
422
- id=tool_call.id,
804
+ # Clean up litellm's addition of __thought__ suffix to tool_call.id for
805
+ # Gemini models. See: https://github.com/BerriAI/litellm/pull/16895
806
+ # This suffix is redundant since we can get thought_signature from
807
+ # provider_specific_fields, and this hack causes validation errors when
808
+ # cross-model passing to other models.
809
+ tool_call_id = tool_call.id
810
+ if model and "gemini" in model.lower() and "__thought__" in tool_call_id:
811
+ tool_call_id = tool_call_id.split("__thought__")[0]
812
+
813
+ # Convert litellm's tool call format to chat completion message format
814
+ base_tool_call = ChatCompletionMessageFunctionToolCall(
815
+ id=tool_call_id,
423
816
  type="function",
424
817
  function=Function(
425
818
  name=tool_call.function.name or "",
426
819
  arguments=tool_call.function.arguments,
427
820
  ),
428
821
  )
822
+
823
+ # Preserve provider-specific fields if present (e.g., Gemini thought signatures)
824
+ if hasattr(tool_call, "provider_specific_fields") and tool_call.provider_specific_fields:
825
+ # Convert to nested extra_content structure
826
+ extra_content: dict[str, Any] = {}
827
+ provider_fields = tool_call.provider_specific_fields
828
+
829
+ # Check for thought_signature (Gemini specific)
830
+ if model and "gemini" in model.lower():
831
+ if "thought_signature" in provider_fields:
832
+ extra_content["google"] = {
833
+ "thought_signature": provider_fields["thought_signature"]
834
+ }
835
+
836
+ return InternalToolCall(
837
+ **base_tool_call.model_dump(),
838
+ extra_content=extra_content if extra_content else None,
839
+ )
840
+
841
+ return base_tool_call