pydantic-ai-slim 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (116) hide show
  1. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/PKG-INFO +4 -4
  2. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_agent_graph.py +66 -56
  3. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_parts_manager.py +5 -4
  4. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_tool_manager.py +50 -29
  5. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/agent/__init__.py +62 -75
  6. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/__init__.py +28 -0
  7. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/anthropic.py +20 -20
  8. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/bedrock.py +1 -1
  9. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/fallback.py +7 -2
  10. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/google.py +66 -6
  11. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/groq.py +1 -0
  12. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/huggingface.py +9 -2
  13. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/openai.py +31 -5
  14. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/__init__.py +10 -1
  15. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/deepseek.py +1 -1
  16. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/moonshotai.py +1 -1
  17. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/qwen.py +4 -1
  18. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/__init__.py +4 -0
  19. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/huggingface.py +27 -0
  20. pydantic_ai_slim-0.7.2/pydantic_ai/providers/ollama.py +105 -0
  21. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/openrouter.py +2 -0
  22. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/result.py +1 -1
  23. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/tools.py +9 -9
  24. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/usage.py +17 -1
  25. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pyproject.toml +1 -1
  26. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/.gitignore +0 -0
  27. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/LICENSE +0 -0
  28. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/README.md +0 -0
  29. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/__init__.py +0 -0
  30. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/__main__.py +0 -0
  31. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_a2a.py +0 -0
  32. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_cli.py +0 -0
  33. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_function_schema.py +0 -0
  34. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_griffe.py +0 -0
  35. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_mcp.py +0 -0
  36. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_output.py +0 -0
  37. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_run_context.py +0 -0
  38. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_system_prompt.py +0 -0
  39. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_thinking_part.py +0 -0
  40. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/_utils.py +0 -0
  41. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/ag_ui.py +0 -0
  42. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/agent/abstract.py +0 -0
  43. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/agent/wrapper.py +0 -0
  44. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/builtin_tools.py +0 -0
  45. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/common_tools/__init__.py +0 -0
  46. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/common_tools/duckduckgo.py +0 -0
  47. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/common_tools/tavily.py +0 -0
  48. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/direct.py +0 -0
  49. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/__init__.py +0 -0
  50. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/__init__.py +0 -0
  51. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_agent.py +0 -0
  52. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_function_toolset.py +0 -0
  53. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_logfire.py +0 -0
  54. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_mcp_server.py +0 -0
  55. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_model.py +0 -0
  56. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_run_context.py +0 -0
  57. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/durable_exec/temporal/_toolset.py +0 -0
  58. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/exceptions.py +0 -0
  59. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/ext/__init__.py +0 -0
  60. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/ext/aci.py +0 -0
  61. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/ext/langchain.py +0 -0
  62. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/format_prompt.py +0 -0
  63. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/mcp.py +0 -0
  64. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/messages.py +0 -0
  65. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/cohere.py +0 -0
  66. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/function.py +0 -0
  67. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/gemini.py +0 -0
  68. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/instrumented.py +0 -0
  69. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/mcp_sampling.py +0 -0
  70. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/mistral.py +0 -0
  71. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/test.py +0 -0
  72. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/models/wrapper.py +0 -0
  73. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/output.py +0 -0
  74. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/_json_schema.py +0 -0
  75. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/amazon.py +0 -0
  76. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/anthropic.py +0 -0
  77. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/cohere.py +0 -0
  78. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/google.py +0 -0
  79. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/grok.py +0 -0
  80. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/groq.py +0 -0
  81. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/meta.py +0 -0
  82. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/mistral.py +0 -0
  83. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/profiles/openai.py +0 -0
  84. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/anthropic.py +0 -0
  85. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/azure.py +0 -0
  86. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/bedrock.py +0 -0
  87. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/cohere.py +0 -0
  88. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/deepseek.py +0 -0
  89. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/fireworks.py +0 -0
  90. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/github.py +0 -0
  91. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/google.py +0 -0
  92. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/google_gla.py +0 -0
  93. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/google_vertex.py +0 -0
  94. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/grok.py +0 -0
  95. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/groq.py +0 -0
  96. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/heroku.py +0 -0
  97. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/mistral.py +0 -0
  98. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/moonshotai.py +0 -0
  99. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/openai.py +0 -0
  100. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/together.py +0 -0
  101. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/providers/vercel.py +0 -0
  102. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/py.typed +0 -0
  103. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/retries.py +0 -0
  104. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/run.py +0 -0
  105. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/settings.py +0 -0
  106. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/__init__.py +0 -0
  107. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/_dynamic.py +0 -0
  108. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/abstract.py +0 -0
  109. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/combined.py +0 -0
  110. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/deferred.py +0 -0
  111. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/filtered.py +0 -0
  112. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/function.py +0 -0
  113. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/prefixed.py +0 -0
  114. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/prepared.py +0 -0
  115. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/renamed.py +0 -0
  116. {pydantic_ai_slim-0.7.0 → pydantic_ai_slim-0.7.2}/pydantic_ai/toolsets/wrapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydantic-ai-slim
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Agent Framework / shim to use Pydantic with LLMs, slim package
5
5
  Author-email: Samuel Colvin <samuel@pydantic.dev>, Marcelo Trylesinski <marcelotryle@gmail.com>, David Montague <david@pydantic.dev>, Alex Hall <alex@pydantic.dev>, Douwe Maan <douwe@pydantic.dev>
6
6
  License-Expression: MIT
@@ -30,7 +30,7 @@ Requires-Dist: exceptiongroup; python_version < '3.11'
30
30
  Requires-Dist: griffe>=1.3.2
31
31
  Requires-Dist: httpx>=0.27
32
32
  Requires-Dist: opentelemetry-api>=1.28.0
33
- Requires-Dist: pydantic-graph==0.7.0
33
+ Requires-Dist: pydantic-graph==0.7.2
34
34
  Requires-Dist: pydantic>=2.10
35
35
  Requires-Dist: typing-inspection>=0.4.0
36
36
  Provides-Extra: a2a
@@ -51,7 +51,7 @@ Requires-Dist: cohere>=5.16.0; (platform_system != 'Emscripten') and extra == 'c
51
51
  Provides-Extra: duckduckgo
52
52
  Requires-Dist: ddgs>=9.0.0; extra == 'duckduckgo'
53
53
  Provides-Extra: evals
54
- Requires-Dist: pydantic-evals==0.7.0; extra == 'evals'
54
+ Requires-Dist: pydantic-evals==0.7.2; extra == 'evals'
55
55
  Provides-Extra: google
56
56
  Requires-Dist: google-genai>=1.28.0; extra == 'google'
57
57
  Provides-Extra: groq
@@ -65,7 +65,7 @@ Requires-Dist: mcp>=1.10.0; (python_version >= '3.10') and extra == 'mcp'
65
65
  Provides-Extra: mistral
66
66
  Requires-Dist: mistralai>=1.9.2; extra == 'mistral'
67
67
  Provides-Extra: openai
68
- Requires-Dist: openai>=1.92.0; extra == 'openai'
68
+ Requires-Dist: openai>=1.99.9; extra == 'openai'
69
69
  Provides-Extra: retries
70
70
  Requires-Dist: tenacity>=8.2.3; extra == 'retries'
71
71
  Provides-Extra: tavily
@@ -23,7 +23,7 @@ from pydantic_graph.nodes import End, NodeRunEndT
23
23
  from . import _output, _system_prompt, exceptions, messages as _messages, models, result, usage as _usage
24
24
  from .exceptions import ToolRetryError
25
25
  from .output import OutputDataT, OutputSpec
26
- from .settings import ModelSettings, merge_model_settings
26
+ from .settings import ModelSettings
27
27
  from .tools import RunContext, ToolDefinition, ToolKind
28
28
 
29
29
  if TYPE_CHECKING:
@@ -158,28 +158,7 @@ class UserPromptNode(AgentNode[DepsT, NodeRunEndT]):
158
158
 
159
159
  async def run(
160
160
  self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]]
161
- ) -> ModelRequestNode[DepsT, NodeRunEndT]:
162
- return ModelRequestNode[DepsT, NodeRunEndT](request=await self._get_first_message(ctx))
163
-
164
- async def _get_first_message(
165
- self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]]
166
- ) -> _messages.ModelRequest:
167
- run_context = build_run_context(ctx)
168
- history, next_message = await self._prepare_messages(
169
- self.user_prompt, ctx.state.message_history, ctx.deps.get_instructions, run_context
170
- )
171
- ctx.state.message_history = history
172
- run_context.messages = history
173
-
174
- return next_message
175
-
176
- async def _prepare_messages(
177
- self,
178
- user_prompt: str | Sequence[_messages.UserContent] | None,
179
- message_history: list[_messages.ModelMessage] | None,
180
- get_instructions: Callable[[RunContext[DepsT]], Awaitable[str | None]],
181
- run_context: RunContext[DepsT],
182
- ) -> tuple[list[_messages.ModelMessage], _messages.ModelRequest]:
161
+ ) -> Union[ModelRequestNode[DepsT, NodeRunEndT], CallToolsNode[DepsT, NodeRunEndT]]: # noqa UP007
183
162
  try:
184
163
  ctx_messages = get_captured_run_messages()
185
164
  except LookupError:
@@ -191,29 +170,48 @@ class UserPromptNode(AgentNode[DepsT, NodeRunEndT]):
191
170
  messages = ctx_messages.messages
192
171
  ctx_messages.used = True
193
172
 
173
+ # Add message history to the `capture_run_messages` list, which will be empty at this point
174
+ messages.extend(ctx.state.message_history)
175
+ # Use the `capture_run_messages` list as the message history so that new messages are added to it
176
+ ctx.state.message_history = messages
177
+
178
+ run_context = build_run_context(ctx)
179
+
194
180
  parts: list[_messages.ModelRequestPart] = []
195
- instructions = await get_instructions(run_context)
196
- if message_history:
197
- # Shallow copy messages
198
- messages.extend(message_history)
181
+ if messages:
199
182
  # Reevaluate any dynamic system prompt parts
200
183
  await self._reevaluate_dynamic_prompts(messages, run_context)
201
184
  else:
202
185
  parts.extend(await self._sys_parts(run_context))
203
186
 
204
- if user_prompt is not None:
205
- parts.append(_messages.UserPromptPart(user_prompt))
206
- elif (
207
- len(parts) == 0
208
- and message_history
209
- and (last_message := message_history[-1])
210
- and isinstance(last_message, _messages.ModelRequest)
211
- ):
212
- # Drop last message that came from history and reuse its parts
213
- messages.pop()
214
- parts.extend(last_message.parts)
187
+ if messages and (last_message := messages[-1]):
188
+ if isinstance(last_message, _messages.ModelRequest) and self.user_prompt is None:
189
+ # Drop last message from history and reuse its parts
190
+ messages.pop()
191
+ parts.extend(last_message.parts)
192
+ elif isinstance(last_message, _messages.ModelResponse):
193
+ if self.user_prompt is None:
194
+ # `CallToolsNode` requires the tool manager to be prepared for the run step
195
+ # This will raise errors for any tool name conflicts
196
+ ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context)
197
+
198
+ # Skip ModelRequestNode and go directly to CallToolsNode
199
+ return CallToolsNode[DepsT, NodeRunEndT](model_response=last_message)
200
+ elif any(isinstance(part, _messages.ToolCallPart) for part in last_message.parts):
201
+ raise exceptions.UserError(
202
+ 'Cannot provide a new user prompt when the message history ends with '
203
+ 'a model response containing unprocessed tool calls. Either process the '
204
+ 'tool calls first (by calling `iter` with `user_prompt=None`) or append a '
205
+ '`ModelRequest` with `ToolResultPart`s.'
206
+ )
215
207
 
216
- return messages, _messages.ModelRequest(parts, instructions=instructions)
208
+ if self.user_prompt is not None:
209
+ parts.append(_messages.UserPromptPart(self.user_prompt))
210
+
211
+ instructions = await ctx.deps.get_instructions(run_context)
212
+ next_message = _messages.ModelRequest(parts, instructions=instructions)
213
+
214
+ return ModelRequestNode[DepsT, NodeRunEndT](request=next_message)
217
215
 
218
216
  async def _reevaluate_dynamic_prompts(
219
217
  self, messages: list[_messages.ModelMessage], run_context: RunContext[DepsT]
@@ -250,9 +248,6 @@ async def _prepare_request_parameters(
250
248
  ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
251
249
  ) -> models.ModelRequestParameters:
252
250
  """Build tools and create an agent model."""
253
- run_context = build_run_context(ctx)
254
- ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context)
255
-
256
251
  output_schema = ctx.deps.output_schema
257
252
  output_object = None
258
253
  if isinstance(output_schema, _output.NativeOutputSchema):
@@ -351,21 +346,28 @@ class ModelRequestNode(AgentNode[DepsT, NodeRunEndT]):
351
346
  ) -> tuple[ModelSettings | None, models.ModelRequestParameters, list[_messages.ModelMessage], RunContext[DepsT]]:
352
347
  ctx.state.message_history.append(self.request)
353
348
 
354
- # Check usage
355
- if ctx.deps.usage_limits: # pragma: no branch
356
- ctx.deps.usage_limits.check_before_request(ctx.state.usage)
357
-
358
- # Increment run_step
359
349
  ctx.state.run_step += 1
360
350
 
361
351
  run_context = build_run_context(ctx)
362
352
 
363
- model_settings = merge_model_settings(ctx.deps.model_settings, None)
353
+ # This will raise errors for any tool name conflicts
354
+ ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context)
355
+
356
+ message_history = await _process_message_history(ctx.state, ctx.deps.history_processors, run_context)
364
357
 
365
358
  model_request_parameters = await _prepare_request_parameters(ctx)
366
359
  model_request_parameters = ctx.deps.model.customize_request_parameters(model_request_parameters)
367
360
 
368
- message_history = await _process_message_history(ctx.state, ctx.deps.history_processors, run_context)
361
+ model_settings = ctx.deps.model_settings
362
+ usage = ctx.state.usage
363
+ if ctx.deps.usage_limits.count_tokens_before_request:
364
+ # Copy to avoid modifying the original usage object with the counted usage
365
+ usage = dataclasses.replace(usage)
366
+
367
+ counted_usage = await ctx.deps.model.count_tokens(message_history, model_settings, model_request_parameters)
368
+ usage.incr(counted_usage)
369
+
370
+ ctx.deps.usage_limits.check_before_request(usage)
369
371
 
370
372
  return model_settings, model_request_parameters, message_history, run_context
371
373
 
@@ -425,9 +427,11 @@ class CallToolsNode(AgentNode[DepsT, NodeRunEndT]):
425
427
  if self._events_iterator is None:
426
428
  # Ensure that the stream is only run once
427
429
 
428
- async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
430
+ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa: C901
429
431
  texts: list[str] = []
430
432
  tool_calls: list[_messages.ToolCallPart] = []
433
+ thinking_parts: list[_messages.ThinkingPart] = []
434
+
431
435
  for part in self.model_response.parts:
432
436
  if isinstance(part, _messages.TextPart):
433
437
  # ignore empty content for text parts, see #437
@@ -440,11 +444,7 @@ class CallToolsNode(AgentNode[DepsT, NodeRunEndT]):
440
444
  elif isinstance(part, _messages.BuiltinToolReturnPart):
441
445
  yield _messages.BuiltinToolResultEvent(part)
442
446
  elif isinstance(part, _messages.ThinkingPart):
443
- # We don't need to do anything with thinking parts in this tool-calling node.
444
- # We need to handle text parts in case there are no tool calls and/or the desired output comes
445
- # from the text, but thinking parts should not directly influence the execution of tools or
446
- # determination of the next node of graph execution here.
447
- pass
447
+ thinking_parts.append(part)
448
448
  else:
449
449
  assert_never(part)
450
450
 
@@ -458,8 +458,18 @@ class CallToolsNode(AgentNode[DepsT, NodeRunEndT]):
458
458
  elif texts:
459
459
  # No events are emitted during the handling of text responses, so we don't need to yield anything
460
460
  self._next_node = await self._handle_text_response(ctx, texts)
461
+ elif thinking_parts:
462
+ # handle thinking-only responses (responses that contain only ThinkingPart instances)
463
+ # this can happen with models that support thinking mode when they don't provide
464
+ # actionable output alongside their thinking content.
465
+ self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
466
+ _messages.ModelRequest(
467
+ parts=[_messages.RetryPromptPart('Responses without text or tool calls are not permitted.')]
468
+ )
469
+ )
461
470
  else:
462
- # we've got an empty response, this sometimes happens with anthropic (and perhaps other models)
471
+ # we got an empty response with no tool calls, text, or thinking
472
+ # this sometimes happens with anthropic (and perhaps other models)
463
473
  # when the model has already returned text along side tool calls
464
474
  # in this scenario, if text responses are allowed, we return text from the most recent model
465
475
  # response, if any
@@ -72,6 +72,7 @@ class ModelResponsePartsManager:
72
72
  vendor_part_id: VendorId | None,
73
73
  content: str,
74
74
  thinking_tags: tuple[str, str] | None = None,
75
+ ignore_leading_whitespace: bool = False,
75
76
  ) -> ModelResponseStreamEvent | None:
76
77
  """Handle incoming text content, creating or updating a TextPart in the manager as appropriate.
77
78
 
@@ -85,6 +86,7 @@ class ModelResponsePartsManager:
85
86
  a TextPart.
86
87
  content: The text content to append to the appropriate TextPart.
87
88
  thinking_tags: If provided, will handle content between the thinking tags as thinking parts.
89
+ ignore_leading_whitespace: If True, will ignore leading whitespace in the content.
88
90
 
89
91
  Returns:
90
92
  - A `PartStartEvent` if a new part was created.
@@ -128,10 +130,9 @@ class ModelResponsePartsManager:
128
130
  return self.handle_thinking_delta(vendor_part_id=vendor_part_id, content='')
129
131
 
130
132
  if existing_text_part_and_index is None:
131
- # If the first text delta is all whitespace, don't emit a new part yet.
132
- # This is a workaround for models that emit `<think>\n</think>\n\n` ahead of tool calls (e.g. Ollama + Qwen3),
133
- # which we don't want to end up treating as a final result.
134
- if content.isspace():
133
+ # This is a workaround for models that emit `<think>\n</think>\n\n` or an empty text part ahead of tool calls (e.g. Ollama + Qwen3),
134
+ # which we don't want to end up treating as a final result when using `run_stream` with `str` a valid `output_type`.
135
+ if ignore_leading_whitespace and (len(content) == 0 or content.isspace()):
135
136
  return None
136
137
 
137
138
  # There is no existing text part that should be updated, so create a new one
@@ -5,6 +5,7 @@ from collections.abc import Iterable
5
5
  from dataclasses import dataclass, field, replace
6
6
  from typing import Any, Generic
7
7
 
8
+ from opentelemetry.trace import Tracer
8
9
  from pydantic import ValidationError
9
10
  from typing_extensions import assert_never
10
11
 
@@ -21,41 +22,46 @@ from .toolsets.abstract import AbstractToolset, ToolsetTool
21
22
  class ToolManager(Generic[AgentDepsT]):
22
23
  """Manages tools for an agent run step. It caches the agent run's toolset's tool definitions and handles calling tools and retries."""
23
24
 
24
- ctx: RunContext[AgentDepsT]
25
- """The agent run context for a specific run step."""
26
25
  toolset: AbstractToolset[AgentDepsT]
27
26
  """The toolset that provides the tools for this run step."""
28
- tools: dict[str, ToolsetTool[AgentDepsT]]
27
+ ctx: RunContext[AgentDepsT] | None = None
28
+ """The agent run context for a specific run step."""
29
+ tools: dict[str, ToolsetTool[AgentDepsT]] | None = None
29
30
  """The cached tools for this run step."""
30
31
  failed_tools: set[str] = field(default_factory=set)
31
32
  """Names of tools that failed in this run step."""
32
33
 
33
- @classmethod
34
- async def build(cls, toolset: AbstractToolset[AgentDepsT], ctx: RunContext[AgentDepsT]) -> ToolManager[AgentDepsT]:
35
- """Build a new tool manager for a specific run step."""
36
- return cls(
37
- ctx=ctx,
38
- toolset=toolset,
39
- tools=await toolset.get_tools(ctx),
40
- )
41
-
42
34
  async def for_run_step(self, ctx: RunContext[AgentDepsT]) -> ToolManager[AgentDepsT]:
43
35
  """Build a new tool manager for the next run step, carrying over the retries from the current run step."""
44
- if ctx.run_step == self.ctx.run_step:
45
- return self
46
-
47
- retries = {
48
- failed_tool_name: self.ctx.retries.get(failed_tool_name, 0) + 1 for failed_tool_name in self.failed_tools
49
- }
50
- return await self.__class__.build(self.toolset, replace(ctx, retries=retries))
36
+ if self.ctx is not None:
37
+ if ctx.run_step == self.ctx.run_step:
38
+ return self
39
+
40
+ retries = {
41
+ failed_tool_name: self.ctx.retries.get(failed_tool_name, 0) + 1
42
+ for failed_tool_name in self.failed_tools
43
+ }
44
+ ctx = replace(ctx, retries=retries)
45
+
46
+ return self.__class__(
47
+ toolset=self.toolset,
48
+ ctx=ctx,
49
+ tools=await self.toolset.get_tools(ctx),
50
+ )
51
51
 
52
52
  @property
53
53
  def tool_defs(self) -> list[ToolDefinition]:
54
54
  """The tool definitions for the tools in this tool manager."""
55
+ if self.tools is None:
56
+ raise ValueError('ToolManager has not been prepared for a run step yet') # pragma: no cover
57
+
55
58
  return [tool.tool_def for tool in self.tools.values()]
56
59
 
57
60
  def get_tool_def(self, name: str) -> ToolDefinition | None:
58
61
  """Get the tool definition for a given tool name, or `None` if the tool is unknown."""
62
+ if self.tools is None:
63
+ raise ValueError('ToolManager has not been prepared for a run step yet') # pragma: no cover
64
+
59
65
  try:
60
66
  return self.tools[name].tool_def
61
67
  except KeyError:
@@ -71,15 +77,25 @@ class ToolManager(Generic[AgentDepsT]):
71
77
  allow_partial: Whether to allow partial validation of the tool arguments.
72
78
  wrap_validation_errors: Whether to wrap validation errors in a retry prompt part.
73
79
  """
80
+ if self.tools is None or self.ctx is None:
81
+ raise ValueError('ToolManager has not been prepared for a run step yet') # pragma: no cover
82
+
74
83
  if (tool := self.tools.get(call.tool_name)) and tool.tool_def.kind == 'output':
75
84
  # Output tool calls are not traced
76
85
  return await self._call_tool(call, allow_partial, wrap_validation_errors)
77
86
  else:
78
- return await self._call_tool_traced(call, allow_partial, wrap_validation_errors)
87
+ return await self._call_tool_traced(
88
+ call,
89
+ allow_partial,
90
+ wrap_validation_errors,
91
+ self.ctx.tracer,
92
+ self.ctx.trace_include_content,
93
+ )
94
+
95
+ async def _call_tool(self, call: ToolCallPart, allow_partial: bool, wrap_validation_errors: bool) -> Any:
96
+ if self.tools is None or self.ctx is None:
97
+ raise ValueError('ToolManager has not been prepared for a run step yet') # pragma: no cover
79
98
 
80
- async def _call_tool(
81
- self, call: ToolCallPart, allow_partial: bool = False, wrap_validation_errors: bool = True
82
- ) -> Any:
83
99
  name = call.tool_name
84
100
  tool = self.tools.get(name)
85
101
  try:
@@ -137,14 +153,19 @@ class ToolManager(Generic[AgentDepsT]):
137
153
  raise e
138
154
 
139
155
  async def _call_tool_traced(
140
- self, call: ToolCallPart, allow_partial: bool = False, wrap_validation_errors: bool = True
156
+ self,
157
+ call: ToolCallPart,
158
+ allow_partial: bool,
159
+ wrap_validation_errors: bool,
160
+ tracer: Tracer,
161
+ include_content: bool = False,
141
162
  ) -> Any:
142
163
  """See <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span>."""
143
164
  span_attributes = {
144
165
  'gen_ai.tool.name': call.tool_name,
145
166
  # NOTE: this means `gen_ai.tool.call.id` will be included even if it was generated by pydantic-ai
146
167
  'gen_ai.tool.call.id': call.tool_call_id,
147
- **({'tool_arguments': call.args_as_json_str()} if self.ctx.trace_include_content else {}),
168
+ **({'tool_arguments': call.args_as_json_str()} if include_content else {}),
148
169
  'logfire.msg': f'running tool: {call.tool_name}',
149
170
  # add the JSON schema so these attributes are formatted nicely in Logfire
150
171
  'logfire.json_schema': json.dumps(
@@ -156,7 +177,7 @@ class ToolManager(Generic[AgentDepsT]):
156
177
  'tool_arguments': {'type': 'object'},
157
178
  'tool_response': {'type': 'object'},
158
179
  }
159
- if self.ctx.trace_include_content
180
+ if include_content
160
181
  else {}
161
182
  ),
162
183
  'gen_ai.tool.name': {},
@@ -165,16 +186,16 @@ class ToolManager(Generic[AgentDepsT]):
165
186
  }
166
187
  ),
167
188
  }
168
- with self.ctx.tracer.start_as_current_span('running tool', attributes=span_attributes) as span:
189
+ with tracer.start_as_current_span('running tool', attributes=span_attributes) as span:
169
190
  try:
170
191
  tool_result = await self._call_tool(call, allow_partial, wrap_validation_errors)
171
192
  except ToolRetryError as e:
172
193
  part = e.tool_retry
173
- if self.ctx.trace_include_content and span.is_recording():
194
+ if include_content and span.is_recording():
174
195
  span.set_attribute('tool_response', part.model_response())
175
196
  raise e
176
197
 
177
- if self.ctx.trace_include_content and span.is_recording():
198
+ if include_content and span.is_recording():
178
199
  span.set_attribute(
179
200
  'tool_response',
180
201
  tool_result
@@ -566,6 +566,8 @@ class Agent(AbstractAgent[AgentDepsT, OutputDataT]):
566
566
  if output_toolset:
567
567
  output_toolset.max_retries = self._max_result_retries
568
568
  output_toolset.output_validators = output_validators
569
+ toolset = self._get_toolset(output_toolset=output_toolset, additional_toolsets=toolsets)
570
+ tool_manager = ToolManager[AgentDepsT](toolset)
569
571
 
570
572
  # Build the graph
571
573
  graph: Graph[_agent_graph.GraphAgentState, _agent_graph.GraphAgentDeps[AgentDepsT, Any], FinalResult[Any]] = (
@@ -581,6 +583,27 @@ class Agent(AbstractAgent[AgentDepsT, OutputDataT]):
581
583
  run_step=0,
582
584
  )
583
585
 
586
+ # Merge model settings in order of precedence: run > agent > model
587
+ merged_settings = merge_model_settings(model_used.settings, self.model_settings)
588
+ model_settings = merge_model_settings(merged_settings, model_settings)
589
+ usage_limits = usage_limits or _usage.UsageLimits()
590
+
591
+ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
592
+ parts = [
593
+ self._instructions,
594
+ *[await func.run(run_context) for func in self._instructions_functions],
595
+ ]
596
+
597
+ model_profile = model_used.profile
598
+ if isinstance(output_schema, _output.PromptedOutputSchema):
599
+ instructions = output_schema.instructions(model_profile.prompted_output_template)
600
+ parts.append(instructions)
601
+
602
+ parts = [p for p in parts if p]
603
+ if not parts:
604
+ return None
605
+ return '\n\n'.join(parts).strip()
606
+
584
607
  if isinstance(model_used, InstrumentedModel):
585
608
  instrumentation_settings = model_used.instrumentation_settings
586
609
  tracer = model_used.instrumentation_settings.tracer
@@ -588,81 +611,45 @@ class Agent(AbstractAgent[AgentDepsT, OutputDataT]):
588
611
  instrumentation_settings = None
589
612
  tracer = NoOpTracer()
590
613
 
591
- run_context = RunContext[AgentDepsT](
592
- deps=deps,
593
- model=model_used,
594
- usage=usage,
614
+ graph_deps = _agent_graph.GraphAgentDeps[AgentDepsT, RunOutputDataT](
615
+ user_deps=deps,
595
616
  prompt=user_prompt,
596
- messages=state.message_history,
617
+ new_message_index=new_message_index,
618
+ model=model_used,
619
+ model_settings=model_settings,
620
+ usage_limits=usage_limits,
621
+ max_result_retries=self._max_result_retries,
622
+ end_strategy=self.end_strategy,
623
+ output_schema=output_schema,
624
+ output_validators=output_validators,
625
+ history_processors=self.history_processors,
626
+ builtin_tools=list(self._builtin_tools),
627
+ tool_manager=tool_manager,
597
628
  tracer=tracer,
598
- trace_include_content=instrumentation_settings is not None and instrumentation_settings.include_content,
599
- run_step=state.run_step,
629
+ get_instructions=get_instructions,
630
+ instrumentation_settings=instrumentation_settings,
631
+ )
632
+ start_node = _agent_graph.UserPromptNode[AgentDepsT](
633
+ user_prompt=user_prompt,
634
+ instructions=self._instructions,
635
+ instructions_functions=self._instructions_functions,
636
+ system_prompts=self._system_prompts,
637
+ system_prompt_functions=self._system_prompt_functions,
638
+ system_prompt_dynamic_functions=self._system_prompt_dynamic_functions,
600
639
  )
601
640
 
602
- toolset = self._get_toolset(output_toolset=output_toolset, additional_toolsets=toolsets)
603
-
604
- async with toolset:
605
- # This will raise errors for any name conflicts
606
- tool_manager = await ToolManager[AgentDepsT].build(toolset, run_context)
607
-
608
- # Merge model settings in order of precedence: run > agent > model
609
- merged_settings = merge_model_settings(model_used.settings, self.model_settings)
610
- model_settings = merge_model_settings(merged_settings, model_settings)
611
- usage_limits = usage_limits or _usage.UsageLimits()
612
- agent_name = self.name or 'agent'
613
- run_span = tracer.start_span(
614
- 'agent run',
615
- attributes={
616
- 'model_name': model_used.model_name if model_used else 'no-model',
617
- 'agent_name': agent_name,
618
- 'logfire.msg': f'{agent_name} run',
619
- },
620
- )
621
-
622
- async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
623
- parts = [
624
- self._instructions,
625
- *[await func.run(run_context) for func in self._instructions_functions],
626
- ]
627
-
628
- model_profile = model_used.profile
629
- if isinstance(output_schema, _output.PromptedOutputSchema):
630
- instructions = output_schema.instructions(model_profile.prompted_output_template)
631
- parts.append(instructions)
632
-
633
- parts = [p for p in parts if p]
634
- if not parts:
635
- return None
636
- return '\n\n'.join(parts).strip()
637
-
638
- graph_deps = _agent_graph.GraphAgentDeps[AgentDepsT, RunOutputDataT](
639
- user_deps=deps,
640
- prompt=user_prompt,
641
- new_message_index=new_message_index,
642
- model=model_used,
643
- model_settings=model_settings,
644
- usage_limits=usage_limits,
645
- max_result_retries=self._max_result_retries,
646
- end_strategy=self.end_strategy,
647
- output_schema=output_schema,
648
- output_validators=output_validators,
649
- history_processors=self.history_processors,
650
- builtin_tools=list(self._builtin_tools),
651
- tool_manager=tool_manager,
652
- tracer=tracer,
653
- get_instructions=get_instructions,
654
- instrumentation_settings=instrumentation_settings,
655
- )
656
- start_node = _agent_graph.UserPromptNode[AgentDepsT](
657
- user_prompt=user_prompt,
658
- instructions=self._instructions,
659
- instructions_functions=self._instructions_functions,
660
- system_prompts=self._system_prompts,
661
- system_prompt_functions=self._system_prompt_functions,
662
- system_prompt_dynamic_functions=self._system_prompt_dynamic_functions,
663
- )
641
+ agent_name = self.name or 'agent'
642
+ run_span = tracer.start_span(
643
+ 'agent run',
644
+ attributes={
645
+ 'model_name': model_used.model_name if model_used else 'no-model',
646
+ 'agent_name': agent_name,
647
+ 'logfire.msg': f'{agent_name} run',
648
+ },
649
+ )
664
650
 
665
- try:
651
+ try:
652
+ async with toolset:
666
653
  async with graph.iter(
667
654
  start_node,
668
655
  state=state,
@@ -682,12 +669,12 @@ class Agent(AbstractAgent[AgentDepsT, OutputDataT]):
682
669
  else json.dumps(InstrumentedModel.serialize_any(final_result.output))
683
670
  ),
684
671
  )
672
+ finally:
673
+ try:
674
+ if instrumentation_settings and run_span.is_recording():
675
+ run_span.set_attributes(self._run_span_end_attributes(state, usage, instrumentation_settings))
685
676
  finally:
686
- try:
687
- if instrumentation_settings and run_span.is_recording():
688
- run_span.set_attributes(self._run_span_end_attributes(state, usage, instrumentation_settings))
689
- finally:
690
- run_span.end()
677
+ run_span.end()
691
678
 
692
679
  def _run_span_end_attributes(
693
680
  self, state: _agent_graph.GraphAgentState, usage: _usage.Usage, settings: InstrumentationSettings
@@ -194,6 +194,13 @@ KnownModelName = TypeAliasType(
194
194
  'gpt-4o-mini-search-preview-2025-03-11',
195
195
  'gpt-4o-search-preview',
196
196
  'gpt-4o-search-preview-2025-03-11',
197
+ 'gpt-5',
198
+ 'gpt-5-2025-08-07',
199
+ 'gpt-5-chat-latest',
200
+ 'gpt-5-mini',
201
+ 'gpt-5-mini-2025-08-07',
202
+ 'gpt-5-nano',
203
+ 'gpt-5-nano-2025-08-07',
197
204
  'grok:grok-4',
198
205
  'grok:grok-4-0709',
199
206
  'grok:grok-3',
@@ -313,11 +320,18 @@ KnownModelName = TypeAliasType(
313
320
  'openai:gpt-4o-mini-search-preview-2025-03-11',
314
321
  'openai:gpt-4o-search-preview',
315
322
  'openai:gpt-4o-search-preview-2025-03-11',
323
+ 'openai:gpt-5',
324
+ 'openai:gpt-5-2025-08-07',
316
325
  'openai:o1',
326
+ 'openai:gpt-5-chat-latest',
317
327
  'openai:o1-2024-12-17',
328
+ 'openai:gpt-5-mini',
318
329
  'openai:o1-mini',
330
+ 'openai:gpt-5-mini-2025-08-07',
319
331
  'openai:o1-mini-2024-09-12',
332
+ 'openai:gpt-5-nano',
320
333
  'openai:o1-preview',
334
+ 'openai:gpt-5-nano-2025-08-07',
321
335
  'openai:o1-preview-2024-09-12',
322
336
  'openai:o1-pro',
323
337
  'openai:o1-pro-2025-03-19',
@@ -399,6 +413,16 @@ class Model(ABC):
399
413
  """Make a request to the model."""
400
414
  raise NotImplementedError()
401
415
 
416
+ async def count_tokens(
417
+ self,
418
+ messages: list[ModelMessage],
419
+ model_settings: ModelSettings | None,
420
+ model_request_parameters: ModelRequestParameters,
421
+ ) -> Usage:
422
+ """Make a request to the model for counting tokens."""
423
+ # This method is not required, but you need to implement it if you want to support `UsageLimits.count_tokens_before_request`.
424
+ raise NotImplementedError(f'Token counting ahead of the request is not supported by {self.__class__.__name__}')
425
+
402
426
  @asynccontextmanager
403
427
  async def request_stream(
404
428
  self,
@@ -679,6 +703,10 @@ def infer_model(model: Model | KnownModelName | str) -> Model: # noqa: C901
679
703
  from .openai import OpenAIModel
680
704
 
681
705
  return OpenAIModel(model_name, provider=provider)
706
+ elif provider == 'openai-responses':
707
+ from .openai import OpenAIResponsesModel
708
+
709
+ return OpenAIResponsesModel(model_name, provider='openai')
682
710
  elif provider in ('google-gla', 'google-vertex'):
683
711
  from .google import GoogleModel
684
712