PyPI - pydantic-ai-slim - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

pydantic-ai-slim 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (25) hide show

pydantic_ai/__init__.py +12 -2
pydantic_ai/_pydantic.py +7 -25
pydantic_ai/_result.py +33 -18
pydantic_ai/_system_prompt.py +2 -2
pydantic_ai/_utils.py +9 -2
pydantic_ai/agent.py +366 -171
pydantic_ai/exceptions.py +20 -2
pydantic_ai/messages.py +111 -50
pydantic_ai/models/__init__.py +39 -14
pydantic_ai/models/anthropic.py +344 -0
pydantic_ai/models/function.py +62 -40
pydantic_ai/models/gemini.py +164 -124
pydantic_ai/models/groq.py +112 -94
pydantic_ai/models/mistral.py +668 -0
pydantic_ai/models/ollama.py +1 -1
pydantic_ai/models/openai.py +120 -96
pydantic_ai/models/test.py +78 -61
pydantic_ai/models/vertexai.py +7 -3
pydantic_ai/result.py +96 -68
pydantic_ai/settings.py +137 -0
pydantic_ai/tools.py +46 -26
{pydantic_ai_slim-0.0.12.dist-info → pydantic_ai_slim-0.0.14.dist-info}/METADATA +8 -3
pydantic_ai_slim-0.0.14.dist-info/RECORD +26 -0
{pydantic_ai_slim-0.0.12.dist-info → pydantic_ai_slim-0.0.14.dist-info}/WHEEL +1 -1
pydantic_ai_slim-0.0.12.dist-info/RECORD +0 -23

pydantic_ai/agent.py CHANGED Viewed

@@ -7,7 +7,7 @@ from collections.abc import AsyncIterator, Awaitable, Iterator, Sequence
 from contextlib import asynccontextmanager, contextmanager
 from dataclasses import dataclass, field
 from types import FrameType
-from typing import Any, Callable, Generic, cast, final, overload
+from typing import Any, Callable, Generic, Literal, cast, final, overload
 import logfire_api
 from typing_extensions import assert_never
@@ -22,6 +22,7 @@ from . import (
     result,
 )
 from .result import ResultData
+from .settings import ModelSettings, UsageLimits, merge_model_settings
 from .tools import (
     AgentDeps,
     RunContext,
@@ -39,6 +40,12 @@ __all__ = ('Agent',)
 _logfire = logfire_api.Logfire(otel_scope='pydantic-ai')
 NoneType = type(None)
+EndStrategy = Literal['early', 'exhaustive']
+"""The strategy for handling multiple tool calls when a final result is found.
+- `'early'`: Stop processing other tool calls once a final result is found
+- `'exhaustive'`: Process all tool calls even after finding a final result
+"""
 @final
@@ -53,7 +60,7 @@ class Agent(Generic[AgentDeps, ResultData]):
     Minimal usage example:
-    ```py
+    ```python
     from pydantic_ai import Agent
     agent = Agent('openai:gpt-4o')
@@ -63,14 +70,31 @@ class Agent(Generic[AgentDeps, ResultData]):
     ```
     """
-    # dataclass fields mostly for my sanity — knowing what attributes are available
+    # we use dataclass fields in order to conveniently know what attributes are available
     model: models.Model | models.KnownModelName | None
     """The default model configured for this agent."""
     name: str | None
     """The name of the agent, used for logging.
     If `None`, we try to infer the agent name from the call frame when the agent is first run.
     """
+    end_strategy: EndStrategy
+    """Strategy for handling tool calls when a final result is found."""
+    model_settings: ModelSettings | None
+    """Optional model request settings to use for this agents's runs, by default.
+    Note, if `model_settings` is provided by `run`, `run_sync`, or `run_stream`, those settings will
+    be merged with this value, with the runtime argument taking priority.
+    """
+    last_run_messages: list[_messages.ModelMessage] | None
+    """The messages from the last run, useful when a run raised an exception.
+    Note: these are not used by the agent, e.g. in future runs, they are just stored for developers' convenience.
+    """
     _result_schema: _result.ResultSchema[ResultData] | None = field(repr=False)
     _result_validators: list[_result.ResultValidator[AgentDeps, ResultData]] = field(repr=False)
     _allow_text_result: bool = field(repr=False)
@@ -80,14 +104,8 @@ class Agent(Generic[AgentDeps, ResultData]):
     _system_prompt_functions: list[_system_prompt.SystemPromptRunner[AgentDeps]] = field(repr=False)
     _deps_type: type[AgentDeps] = field(repr=False)
     _max_result_retries: int = field(repr=False)
-    _current_result_retry: int = field(repr=False)
     _override_deps: _utils.Option[AgentDeps] = field(default=None, repr=False)
     _override_model: _utils.Option[models.Model] = field(default=None, repr=False)
-    last_run_messages: list[_messages.Message] | None = None
-    """The messages from the last run, useful when a run raised an exception.
-    Note: these are not used by the agent, e.g. in future runs, they are just stored for developers' convenience.
-    """
     def __init__(
         self,
@@ -97,18 +115,20 @@ class Agent(Generic[AgentDeps, ResultData]):
         system_prompt: str | Sequence[str] = (),
         deps_type: type[AgentDeps] = NoneType,
         name: str | None = None,
+        model_settings: ModelSettings | None = None,
         retries: int = 1,
         result_tool_name: str = 'final_result',
         result_tool_description: str | None = None,
         result_retries: int | None = None,
         tools: Sequence[Tool[AgentDeps] | ToolFuncEither[AgentDeps, ...]] = (),
         defer_model_check: bool = False,
+        end_strategy: EndStrategy = 'early',
     ):
         """Create an agent.
         Args:
             model: The default model to use for this agent, if not provide,
-                you must provide the model when calling the agent.
+                you must provide the model when calling it.
             result_type: The type of the result data, used to validate the result data, defaults to `str`.
             system_prompt: Static system prompts to use for this agent, you can also register system
                 prompts via a function with [`system_prompt`][pydantic_ai.Agent.system_prompt].
@@ -118,6 +138,7 @@ class Agent(Generic[AgentDeps, ResultData]):
                 or add a type hint `: Agent[None, <return type>]`.
             name: The name of the agent, used for logging. If `None`, we try to infer the agent name from the call frame
                 when the agent is first run.
+            model_settings: Optional model request settings to use for this agent's runs, by default.
             retries: The default number of retries to allow before raising an error.
             result_tool_name: The name of the tool to use for the final result.
             result_tool_description: The description of the final result tool.
@@ -129,13 +150,18 @@ class Agent(Generic[AgentDeps, ResultData]):
                 which checks for the necessary environment variables. Set this to `false`
                 to defer the evaluation until the first run. Useful if you want to
                 [override the model][pydantic_ai.Agent.override] for testing.
+            end_strategy: Strategy for handling tool calls that are requested alongside a final result.
+                See [`EndStrategy`][pydantic_ai.agent.EndStrategy] for more information.
         """
         if model is None or defer_model_check:
             self.model = model
         else:
             self.model = models.infer_model(model)
+        self.end_strategy = end_strategy
         self.name = name
+        self.model_settings = model_settings
+        self.last_run_messages = None
         self._result_schema = _result.ResultSchema[result_type].build(
             result_type, result_tool_name, result_tool_description
         )
@@ -153,25 +179,39 @@ class Agent(Generic[AgentDeps, ResultData]):
         self._deps_type = deps_type
         self._system_prompt_functions = []
         self._max_result_retries = result_retries if result_retries is not None else retries
-        self._current_result_retry = 0
         self._result_validators = []
     async def run(
         self,
         user_prompt: str,
         *,
-        message_history: list[_messages.Message] | None = None,
+        message_history: list[_messages.ModelMessage] | None = None,
         model: models.Model | models.KnownModelName | None = None,
         deps: AgentDeps = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: UsageLimits | None = None,
         infer_name: bool = True,
     ) -> result.RunResult[ResultData]:
         """Run the agent with a user prompt in async mode.
+        Example:
+        ```python
+        from pydantic_ai import Agent
+        agent = Agent('openai:gpt-4o')
+        result_sync = agent.run_sync('What is the capital of Italy?')
+        print(result_sync.data)
+        #> Rome
+        ```
         Args:
             user_prompt: User input to start/continue the conversation.
             message_history: History of the conversation so far.
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
             deps: Optional dependencies to use for this run.
+            model_settings: Optional settings to use for this model's request.
+            usage_limits: Optional limits on model request count or token usage.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
         Returns:
@@ -182,6 +222,7 @@ class Agent(Generic[AgentDeps, ResultData]):
         model_used, mode_selection = await self._get_model(model)
         deps = self._get_deps(deps)
+        new_message_index = len(message_history) if message_history else 0
         with _logfire.span(
             '{agent_name} run {prompt=}',
@@ -191,67 +232,91 @@ class Agent(Generic[AgentDeps, ResultData]):
             model_name=model_used.name(),
             agent_name=self.name or 'agent',
         ) as run_span:
-            new_message_index, messages = await self._prepare_messages(deps, user_prompt, message_history)
-            self.last_run_messages = messages
+            run_context = RunContext(deps, 0, [], None, model_used)
+            messages = await self._prepare_messages(user_prompt, message_history, run_context)
+            self.last_run_messages = run_context.messages = messages
             for tool in self._function_tools.values():
                 tool.current_retry = 0
-            cost = result.Cost()
+            usage = result.Usage(requests=0)
+            model_settings = merge_model_settings(self.model_settings, model_settings)
+            usage_limits = usage_limits or UsageLimits()
             run_step = 0
             while True:
+                usage_limits.check_before_request(usage)
                 run_step += 1
                 with _logfire.span('preparing model and tools {run_step=}', run_step=run_step):
-                    agent_model = await self._prepare_model(model_used, deps)
+                    agent_model = await self._prepare_model(run_context)
                 with _logfire.span('model request', run_step=run_step) as model_req_span:
-                    model_response, request_cost = await agent_model.request(messages)
+                    model_response, request_usage = await agent_model.request(messages, model_settings)
                     model_req_span.set_attribute('response', model_response)
-                    model_req_span.set_attribute('cost', request_cost)
-                    model_req_span.message = f'model request -> {model_response.role}'
+                    model_req_span.set_attribute('usage', request_usage)
                 messages.append(model_response)
-                cost += request_cost
+                usage += request_usage
+                usage.requests += 1
+                usage_limits.check_tokens(request_usage)
                 with _logfire.span('handle model response', run_step=run_step) as handle_span:
-                    final_result, response_messages = await self._handle_model_response(model_response, deps)
+                    final_result, tool_responses = await self._handle_model_response(model_response, run_context)
-                    # Add all messages to the conversation
-                    messages.extend(response_messages)
+                    if tool_responses:
+                        # Add parts to the conversation as a new message
+                        messages.append(_messages.ModelRequest(tool_responses))
                     # Check if we got a final result
                     if final_result is not None:
                         result_data = final_result.data
                         run_span.set_attribute('all_messages', messages)
-                        run_span.set_attribute('cost', cost)
+                        run_span.set_attribute('usage', usage)
                         handle_span.set_attribute('result', result_data)
                         handle_span.message = 'handle model response -> final result'
-                        return result.RunResult(messages, new_message_index, result_data, cost)
+                        return result.RunResult(messages, new_message_index, result_data, usage)
                     else:
                         # continue the conversation
-                        handle_span.set_attribute('tool_responses', response_messages)
-                        response_msgs = ' '.join(r.role for r in response_messages)
-                        handle_span.message = f'handle model response -> {response_msgs}'
+                        handle_span.set_attribute('tool_responses', tool_responses)
+                        tool_responses_str = ' '.join(r.part_kind for r in tool_responses)
+                        handle_span.message = f'handle model response -> {tool_responses_str}'
     def run_sync(
         self,
         user_prompt: str,
         *,
-        message_history: list[_messages.Message] | None = None,
+        message_history: list[_messages.ModelMessage] | None = None,
         model: models.Model | models.KnownModelName | None = None,
         deps: AgentDeps = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: UsageLimits | None = None,
         infer_name: bool = True,
     ) -> result.RunResult[ResultData]:
         """Run the agent with a user prompt synchronously.
-        This is a convenience method that wraps `self.run` with `loop.run_until_complete()`.
+        This is a convenience method that wraps [`self.run`][pydantic_ai.Agent.run] with `loop.run_until_complete(...)`.
+        You therefore can't use this method inside async code or if there's an active event loop.
+        Example:
+        ```python
+        from pydantic_ai import Agent
+        agent = Agent('openai:gpt-4o')
+        async def main():
+            result = await agent.run('What is the capital of France?')
+            print(result.data)
+            #> Paris
+        ```
         Args:
             user_prompt: User input to start/continue the conversation.
             message_history: History of the conversation so far.
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
             deps: Optional dependencies to use for this run.
+            model_settings: Optional settings to use for this model's request.
+            usage_limits: Optional limits on model request count or token usage.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
         Returns:
@@ -259,9 +324,16 @@ class Agent(Generic[AgentDeps, ResultData]):
         """
         if infer_name and self.name is None:
             self._infer_name(inspect.currentframe())
-        loop = asyncio.get_event_loop()
-        return loop.run_until_complete(
-            self.run(user_prompt, message_history=message_history, model=model, deps=deps, infer_name=False)
+        return asyncio.get_event_loop().run_until_complete(
+            self.run(
+                user_prompt,
+                message_history=message_history,
+                model=model,
+                deps=deps,
+                model_settings=model_settings,
+                usage_limits=usage_limits,
+                infer_name=False,
+            )
         )
     @asynccontextmanager
@@ -269,18 +341,34 @@ class Agent(Generic[AgentDeps, ResultData]):
         self,
         user_prompt: str,
         *,
-        message_history: list[_messages.Message] | None = None,
+        message_history: list[_messages.ModelMessage] | None = None,
         model: models.Model | models.KnownModelName | None = None,
         deps: AgentDeps = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: UsageLimits | None = None,
         infer_name: bool = True,
     ) -> AsyncIterator[result.StreamedRunResult[AgentDeps, ResultData]]:
         """Run the agent with a user prompt in async mode, returning a streamed response.
+        Example:
+        ```python
+        from pydantic_ai import Agent
+        agent = Agent('openai:gpt-4o')
+        async def main():
+            async with agent.run_stream('What is the capital of the UK?') as response:
+                print(await response.get_data())
+                #> London
+        ```
         Args:
             user_prompt: User input to start/continue the conversation.
             message_history: History of the conversation so far.
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
             deps: Optional dependencies to use for this run.
+            model_settings: Optional settings to use for this model's request.
+            usage_limits: Optional limits on model request count or token usage.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
         Returns:
@@ -293,6 +381,7 @@ class Agent(Generic[AgentDeps, ResultData]):
         model_used, mode_selection = await self._get_model(model)
         deps = self._get_deps(deps)
+        new_message_index = len(message_history) if message_history else 0
         with _logfire.span(
             '{agent_name} run stream {prompt=}',
@@ -302,60 +391,89 @@ class Agent(Generic[AgentDeps, ResultData]):
             model_name=model_used.name(),
             agent_name=self.name or 'agent',
         ) as run_span:
-            new_message_index, messages = await self._prepare_messages(deps, user_prompt, message_history)
-            self.last_run_messages = messages
+            run_context = RunContext(deps, 0, [], None, model_used)
+            messages = await self._prepare_messages(user_prompt, message_history, run_context)
+            self.last_run_messages = run_context.messages = messages
             for tool in self._function_tools.values():
                 tool.current_retry = 0
-            cost = result.Cost()
+            usage = result.Usage()
+            model_settings = merge_model_settings(self.model_settings, model_settings)
+            usage_limits = usage_limits or UsageLimits()
             run_step = 0
             while True:
                 run_step += 1
+                usage_limits.check_before_request(usage)
                 with _logfire.span('preparing model and tools {run_step=}', run_step=run_step):
-                    agent_model = await self._prepare_model(model_used, deps)
+                    agent_model = await self._prepare_model(run_context)
                 with _logfire.span('model request {run_step=}', run_step=run_step) as model_req_span:
-                    async with agent_model.request_stream(messages) as model_response:
+                    async with agent_model.request_stream(messages, model_settings) as model_response:
+                        usage.requests += 1
                         model_req_span.set_attribute('response_type', model_response.__class__.__name__)
                         # We want to end the "model request" span here, but we can't exit the context manager
                         # in the traditional way
                         model_req_span.__exit__(None, None, None)
                         with _logfire.span('handle model response') as handle_span:
-                            final_result, response_messages = await self._handle_streamed_model_response(
-                                model_response, deps
-                            )
-                            # Add all messages to the conversation
-                            messages.extend(response_messages)
+                            maybe_final_result = await self._handle_streamed_model_response(model_response, run_context)
                             # Check if we got a final result
-                            if final_result is not None:
-                                result_stream = final_result.data
-                                run_span.set_attribute('all_messages', messages)
-                                handle_span.set_attribute('result_type', result_stream.__class__.__name__)
+                            if isinstance(maybe_final_result, _MarkFinalResult):
+                                result_stream = maybe_final_result.data
+                                result_tool_name = maybe_final_result.tool_name
                                 handle_span.message = 'handle model response -> final result'
+                                async def on_complete():
+                                    """Called when the stream has completed.
+                                    The model response will have been added to messages by now
+                                    by `StreamedRunResult._marked_completed`.
+                                    """
+                                    last_message = messages[-1]
+                                    assert isinstance(last_message, _messages.ModelResponse)
+                                    tool_calls = [
+                                        part for part in last_message.parts if isinstance(part, _messages.ToolCallPart)
+                                    ]
+                                    parts = await self._process_function_tools(
+                                        tool_calls, result_tool_name, run_context
+                                    )
+                                    if parts:
+                                        messages.append(_messages.ModelRequest(parts))
+                                    run_span.set_attribute('all_messages', messages)
                                 yield result.StreamedRunResult(
                                     messages,
                                     new_message_index,
-                                    cost,
+                                    usage,
+                                    usage_limits,
                                     result_stream,
                                     self._result_schema,
-                                    deps,
+                                    run_context,
                                     self._result_validators,
-                                    lambda m: run_span.set_attribute('all_messages', messages),
+                                    result_tool_name,
+                                    on_complete,
                                 )
                                 return
                             else:
                                 # continue the conversation
-                                handle_span.set_attribute('tool_responses', response_messages)
-                                response_msgs = ' '.join(r.role for r in response_messages)
-                                handle_span.message = f'handle model response -> {response_msgs}'
-                                # the model_response should have been fully streamed by now, we can add it's cost
-                                cost += model_response.cost()
+                                model_response_msg, tool_responses = maybe_final_result
+                                # if we got a model response add that to messages
+                                messages.append(model_response_msg)
+                                if tool_responses:
+                                    # if we got one or more tool response parts, add a model request message
+                                    messages.append(_messages.ModelRequest(tool_responses))
+                                handle_span.set_attribute('tool_responses', tool_responses)
+                                tool_responses_str = ' '.join(r.part_kind for r in tool_responses)
+                                handle_span.message = f'handle model response -> {tool_responses_str}'
+                                # the model_response should have been fully streamed by now, we can add its usage
+                                model_response_usage = model_response.usage()
+                                usage += model_response_usage
+                                usage_limits.check_tokens(usage)
     @contextmanager
     def override(
@@ -367,6 +485,7 @@ class Agent(Generic[AgentDeps, ResultData]):
         """Context manager to temporarily override agent dependencies and model.
         This is particularly useful when testing.
+        You can find an example of this [here](../testing-evals.md#overriding-model-via-pytest-fixtures).
         Args:
             deps: The dependencies to use instead of the dependencies passed to the agent run.
@@ -415,14 +534,14 @@ class Agent(Generic[AgentDeps, ResultData]):
     ) -> _system_prompt.SystemPromptFunc[AgentDeps]:
         """Decorator to register a system prompt function.
-        Optionally takes [`RunContext`][pydantic_ai.tools.RunContext] as it's only argument.
+        Optionally takes [`RunContext`][pydantic_ai.tools.RunContext] as its only argument.
         Can decorate a sync or async functions.
         Overloads for every possible signature of `system_prompt` are included so the decorator doesn't obscure
         the type of the function, see `tests/typed_agent.py` for tests.
         Example:
-        ```py
+        ```python
         from pydantic_ai import Agent, RunContext
         agent = Agent('test', deps_type=str)
@@ -466,14 +585,14 @@ class Agent(Generic[AgentDeps, ResultData]):
     ) -> _result.ResultValidatorFunc[AgentDeps, ResultData]:
         """Decorator to register a result validator function.
-        Optionally takes [`RunContext`][pydantic_ai.tools.RunContext] as it's first argument.
+        Optionally takes [`RunContext`][pydantic_ai.tools.RunContext] as its first argument.
         Can decorate a sync or async functions.
         Overloads for every possible signature of `result_validator` are included so the decorator doesn't obscure
         the type of the function, see `tests/typed_agent.py` for tests.
         Example:
-        ```py
+        ```python
         from pydantic_ai import Agent, ModelRetry, RunContext
         agent = Agent('test', deps_type=str)
@@ -523,13 +642,13 @@ class Agent(Generic[AgentDeps, ResultData]):
         Can decorate a sync or async functions.
         The docstring is inspected to extract both the tool description and description of each parameter,
-        [learn more](../agents.md#function-tools-and-schema).
+        [learn more](../tools.md#function-tools-and-schema).
         We can't add overloads for every possible signature of tool, since the return type is a recursive union
         so the signature of functions decorated with `@agent.tool` is obscured.
         Example:
-        ```py
+        ```python
         from pydantic_ai import Agent, RunContext
         agent = Agent('test', deps_type=int)
@@ -595,13 +714,13 @@ class Agent(Generic[AgentDeps, ResultData]):
         Can decorate a sync or async functions.
         The docstring is inspected to extract both the tool description and description of each parameter,
-        [learn more](../agents.md#function-tools-and-schema).
+        [learn more](../tools.md#function-tools-and-schema).
         We can't add overloads for every possible signature of tool, since the return type is a recursive union
         so the signature of functions decorated with `@agent.tool` is obscured.
         Example:
-        ```py
+        ```python
         from pydantic_ai import Agent, RunContext
         agent = Agent('test')
@@ -696,193 +815,266 @@ class Agent(Generic[AgentDeps, ResultData]):
         return model_, mode_selection
-    async def _prepare_model(self, model: models.Model, deps: AgentDeps) -> models.AgentModel:
-        """Create building tools and create an agent model."""
+    async def _prepare_model(self, run_context: RunContext[AgentDeps]) -> models.AgentModel:
+        """Build tools and create an agent model."""
         function_tools: list[ToolDefinition] = []
         async def add_tool(tool: Tool[AgentDeps]) -> None:
-            ctx = RunContext(deps, tool.current_retry, tool.name)
+            ctx = run_context.replace_with(retry=tool.current_retry, tool_name=tool.name)
             if tool_def := await tool.prepare_tool_def(ctx):
                 function_tools.append(tool_def)
         await asyncio.gather(*map(add_tool, self._function_tools.values()))
-        return await model.agent_model(
+        return await run_context.model.agent_model(
             function_tools=function_tools,
             allow_text_result=self._allow_text_result,
             result_tools=self._result_schema.tool_defs() if self._result_schema is not None else [],
         )
     async def _prepare_messages(
-        self, deps: AgentDeps, user_prompt: str, message_history: list[_messages.Message] | None
-    ) -> tuple[int, list[_messages.Message]]:
-        # if message history includes system prompts, we don't want to regenerate them
-        if message_history and any(m.role == 'system' for m in message_history):
+        self, user_prompt: str, message_history: list[_messages.ModelMessage] | None, run_context: RunContext[AgentDeps]
+    ) -> list[_messages.ModelMessage]:
+        if message_history:
             # shallow copy messages
             messages = message_history.copy()
+            messages.append(_messages.ModelRequest([_messages.UserPromptPart(user_prompt)]))
         else:
-            messages = await self._init_messages(deps)
-            if message_history:
-                messages += message_history
+            parts = await self._sys_parts(run_context)
+            parts.append(_messages.UserPromptPart(user_prompt))
+            messages: list[_messages.ModelMessage] = [_messages.ModelRequest(parts)]
-        new_message_index = len(messages)
-        messages.append(_messages.UserPrompt(user_prompt))
-        return new_message_index, messages
+        return messages
     async def _handle_model_response(
-        self, model_response: _messages.ModelAnyResponse, deps: AgentDeps
-    ) -> tuple[_MarkFinalResult[ResultData] | None, list[_messages.Message]]:
+        self, model_response: _messages.ModelResponse, run_context: RunContext[AgentDeps]
+    ) -> tuple[_MarkFinalResult[ResultData] | None, list[_messages.ModelRequestPart]]:
         """Process a non-streamed response from the model.
         Returns:
-            A tuple of `(final_result, messages)`. If `final_result` is not `None`, the conversation should end.
+            A tuple of `(final_result, request parts)`. If `final_result` is not `None`, the conversation should end.
         """
-        if model_response.role == 'model-text-response':
-            # plain string response
-            if self._allow_text_result:
-                result_data_input = cast(ResultData, model_response.content)
+        texts: list[str] = []
+        tool_calls: list[_messages.ToolCallPart] = []
+        for part in model_response.parts:
+            if isinstance(part, _messages.TextPart):
+                # ignore empty content for text parts, see #437
+                if part.content:
+                    texts.append(part.content)
+            else:
+                tool_calls.append(part)
+        if texts:
+            text = '\n\n'.join(texts)
+            return await self._handle_text_response(text, run_context)
+        elif tool_calls:
+            return await self._handle_structured_response(tool_calls, run_context)
+        else:
+            raise exceptions.UnexpectedModelBehavior('Received empty model response')
+    async def _handle_text_response(
+        self, text: str, run_context: RunContext[AgentDeps]
+    ) -> tuple[_MarkFinalResult[ResultData] | None, list[_messages.ModelRequestPart]]:
+        """Handle a plain text response from the model for non-streaming responses."""
+        if self._allow_text_result:
+            result_data_input = cast(ResultData, text)
+            try:
+                result_data = await self._validate_result(result_data_input, run_context, None)
+            except _result.ToolRetryError as e:
+                self._incr_result_retry(run_context)
+                return None, [e.tool_retry]
+            else:
+                return _MarkFinalResult(result_data, None), []
+        else:
+            self._incr_result_retry(run_context)
+            response = _messages.RetryPromptPart(
+                content='Plain text responses are not permitted, please call one of the functions instead.',
+            )
+            return None, [response]
+    async def _handle_structured_response(
+        self, tool_calls: list[_messages.ToolCallPart], run_context: RunContext[AgentDeps]
+    ) -> tuple[_MarkFinalResult[ResultData] | None, list[_messages.ModelRequestPart]]:
+        """Handle a structured response containing tool calls from the model for non-streaming responses."""
+        assert tool_calls, 'Expected at least one tool call'
+        # first look for the result tool call
+        final_result: _MarkFinalResult[ResultData] | None = None
+        parts: list[_messages.ModelRequestPart] = []
+        if result_schema := self._result_schema:
+            if match := result_schema.find_tool(tool_calls):
+                call, result_tool = match
                 try:
-                    result_data = await self._validate_result(result_data_input, deps, None)
+                    result_data = result_tool.validate(call)
+                    result_data = await self._validate_result(result_data, run_context, call)
                 except _result.ToolRetryError as e:
-                    self._incr_result_retry()
-                    return None, [e.tool_retry]
+                    self._incr_result_retry(run_context)
+                    parts.append(e.tool_retry)
                 else:
-                    return _MarkFinalResult(result_data), []
-            else:
-                self._incr_result_retry()
-                response = _messages.RetryPrompt(
-                    content='Plain text responses are not permitted, please call one of the functions instead.',
+                    final_result = _MarkFinalResult(result_data, call.tool_name)
+        # Then build the other request parts based on end strategy
+        parts += await self._process_function_tools(tool_calls, final_result and final_result.tool_name, run_context)
+        return final_result, parts
+    async def _process_function_tools(
+        self,
+        tool_calls: list[_messages.ToolCallPart],
+        result_tool_name: str | None,
+        run_context: RunContext[AgentDeps],
+    ) -> list[_messages.ModelRequestPart]:
+        """Process function (non-result) tool calls in parallel.
+        Also add stub return parts for any other tools that need it.
+        """
+        parts: list[_messages.ModelRequestPart] = []
+        tasks: list[asyncio.Task[_messages.ModelRequestPart]] = []
+        stub_function_tools = bool(result_tool_name) and self.end_strategy == 'early'
+        # we rely on the fact that if we found a result, it's the first result tool in the last
+        found_used_result_tool = False
+        for call in tool_calls:
+            if call.tool_name == result_tool_name and not found_used_result_tool:
+                found_used_result_tool = True
+                parts.append(
+                    _messages.ToolReturnPart(
+                        tool_name=call.tool_name,
+                        content='Final result processed.',
+                        tool_call_id=call.tool_call_id,
+                    )
                 )
-                return None, [response]
-        elif model_response.role == 'model-structured-response':
-            if self._result_schema is not None:
-                # if there's a result schema, and any of the calls match one of its tools, return the result
-                # NOTE: this means we ignore any other tools called here
-                if match := self._result_schema.find_tool(model_response):
-                    call, result_tool = match
-                    try:
-                        result_data = result_tool.validate(call)
-                        result_data = await self._validate_result(result_data, deps, call)
-                    except _result.ToolRetryError as e:
-                        self._incr_result_retry()
-                        return None, [e.tool_retry]
-                    else:
-                        # Add a ToolReturn message for the schema tool call
-                        tool_return = _messages.ToolReturn(
+            elif tool := self._function_tools.get(call.tool_name):
+                if stub_function_tools:
+                    parts.append(
+                        _messages.ToolReturnPart(
                             tool_name=call.tool_name,
-                            content='Final result processed.',
-                            tool_id=call.tool_id,
+                            content='Tool not executed - a final result was already processed.',
+                            tool_call_id=call.tool_call_id,
                         )
-                        return _MarkFinalResult(result_data), [tool_return]
-            if not model_response.calls:
-                raise exceptions.UnexpectedModelBehavior('Received empty tool call message')
-            # otherwise we run all tool functions in parallel
-            messages: list[_messages.Message] = []
-            tasks: list[asyncio.Task[_messages.Message]] = []
-            for call in model_response.calls:
-                if tool := self._function_tools.get(call.tool_name):
-                    tasks.append(asyncio.create_task(tool.run(deps, call), name=call.tool_name))
+                    )
                 else:
-                    messages.append(self._unknown_tool(call.tool_name))
+                    tasks.append(asyncio.create_task(tool.run(call, run_context), name=call.tool_name))
+            elif self._result_schema is not None and call.tool_name in self._result_schema.tools:
+                # if tool_name is in _result_schema, it means we found a result tool but an error occurred in
+                # validation, we don't add another part here
+                if result_tool_name is not None:
+                    parts.append(
+                        _messages.ToolReturnPart(
+                            tool_name=call.tool_name,
+                            content='Result tool not used - a final result was already processed.',
+                            tool_call_id=call.tool_call_id,
+                        )
+                    )
+            else:
+                parts.append(self._unknown_tool(call.tool_name, run_context))
+        # Run all tool tasks in parallel
+        if tasks:
             with _logfire.span('running {tools=}', tools=[t.get_name() for t in tasks]):
-                task_results: Sequence[_messages.Message] = await asyncio.gather(*tasks)
-                messages.extend(task_results)
-            return None, messages
-        else:
-            assert_never(model_response)
+                task_results: Sequence[_messages.ModelRequestPart] = await asyncio.gather(*tasks)
+                parts.extend(task_results)
+        return parts
     async def _handle_streamed_model_response(
-        self, model_response: models.EitherStreamedResponse, deps: AgentDeps
-    ) -> tuple[_MarkFinalResult[models.EitherStreamedResponse] | None, list[_messages.Message]]:
+        self,
+        model_response: models.EitherStreamedResponse,
+        run_context: RunContext[AgentDeps],
+    ) -> (
+        _MarkFinalResult[models.EitherStreamedResponse]
+        | tuple[_messages.ModelResponse, list[_messages.ModelRequestPart]]
+    ):
         """Process a streamed response from the model.
         Returns:
-            A tuple of (final_result, messages). If final_result is not None, the conversation should end.
+            Either a final result or a tuple of the model response and the tool responses for the next request.
+            If a final result is returned, the conversation should end.
         """
         if isinstance(model_response, models.StreamTextResponse):
             # plain string response
             if self._allow_text_result:
-                return _MarkFinalResult(model_response), []
+                return _MarkFinalResult(model_response, None)
             else:
-                self._incr_result_retry()
-                response = _messages.RetryPrompt(
+                self._incr_result_retry(run_context)
+                response = _messages.RetryPromptPart(
                     content='Plain text responses are not permitted, please call one of the functions instead.',
                 )
-                # stream the response, so cost is correct
+                # stream the response, so usage is correct
                 async for _ in model_response:
                     pass
-                return None, [response]
-        else:
-            assert isinstance(model_response, models.StreamStructuredResponse), f'Unexpected response: {model_response}'
+                text = ''.join(model_response.get(final=True))
+                return _messages.ModelResponse([_messages.TextPart(text)]), [response]
+        elif isinstance(model_response, models.StreamStructuredResponse):
             if self._result_schema is not None:
                 # if there's a result schema, iterate over the stream until we find at least one tool
                 # NOTE: this means we ignore any other tools called here
                 structured_msg = model_response.get()
-                while not structured_msg.calls:
+                while not structured_msg.parts:
                     try:
                         await model_response.__anext__()
                     except StopAsyncIteration:
                         break
                     structured_msg = model_response.get()
-                if match := self._result_schema.find_tool(structured_msg):
+                if match := self._result_schema.find_tool(structured_msg.parts):
                     call, _ = match
-                    tool_return = _messages.ToolReturn(
-                        tool_name=call.tool_name,
-                        content='Final result processed.',
-                        tool_id=call.tool_id,
-                    )
-                    return _MarkFinalResult(model_response), [tool_return]
+                    return _MarkFinalResult(model_response, call.tool_name)
             # the model is calling a tool function, consume the response to get the next message
             async for _ in model_response:
                 pass
-            structured_msg = model_response.get()
-            if not structured_msg.calls:
+            model_response_msg = model_response.get()
+            if not model_response_msg.parts:
                 raise exceptions.UnexpectedModelBehavior('Received empty tool call message')
-            messages: list[_messages.Message] = [structured_msg]
             # we now run all tool functions in parallel
-            tasks: list[asyncio.Task[_messages.Message]] = []
-            for call in structured_msg.calls:
-                if tool := self._function_tools.get(call.tool_name):
-                    tasks.append(asyncio.create_task(tool.run(deps, call), name=call.tool_name))
-                else:
-                    messages.append(self._unknown_tool(call.tool_name))
+            tasks: list[asyncio.Task[_messages.ModelRequestPart]] = []
+            parts: list[_messages.ModelRequestPart] = []
+            for item in model_response_msg.parts:
+                if isinstance(item, _messages.ToolCallPart):
+                    call = item
+                    if tool := self._function_tools.get(call.tool_name):
+                        tasks.append(asyncio.create_task(tool.run(call, run_context), name=call.tool_name))
+                    else:
+                        parts.append(self._unknown_tool(call.tool_name, run_context))
             with _logfire.span('running {tools=}', tools=[t.get_name() for t in tasks]):
-                task_results: Sequence[_messages.Message] = await asyncio.gather(*tasks)
-                messages.extend(task_results)
-            return None, messages
+                task_results: Sequence[_messages.ModelRequestPart] = await asyncio.gather(*tasks)
+                parts.extend(task_results)
+            return model_response_msg, parts
+        else:
+            assert_never(model_response)
     async def _validate_result(
-        self, result_data: ResultData, deps: AgentDeps, tool_call: _messages.ToolCall | None
+        self,
+        result_data: ResultData,
+        run_context: RunContext[AgentDeps],
+        tool_call: _messages.ToolCallPart | None,
     ) -> ResultData:
         for validator in self._result_validators:
-            result_data = await validator.validate(result_data, deps, self._current_result_retry, tool_call)
+            result_data = await validator.validate(result_data, tool_call, run_context)
         return result_data
-    def _incr_result_retry(self) -> None:
-        self._current_result_retry += 1
-        if self._current_result_retry > self._max_result_retries:
+    def _incr_result_retry(self, run_context: RunContext[AgentDeps]) -> None:
+        run_context.retry += 1
+        if run_context.retry > self._max_result_retries:
             raise exceptions.UnexpectedModelBehavior(
                 f'Exceeded maximum retries ({self._max_result_retries}) for result validation'
             )
-    async def _init_messages(self, deps: AgentDeps) -> list[_messages.Message]:
+    async def _sys_parts(self, run_context: RunContext[AgentDeps]) -> list[_messages.ModelRequestPart]:
         """Build the initial messages for the conversation."""
-        messages: list[_messages.Message] = [_messages.SystemPrompt(p) for p in self._system_prompts]
+        messages: list[_messages.ModelRequestPart] = [_messages.SystemPromptPart(p) for p in self._system_prompts]
         for sys_prompt_runner in self._system_prompt_functions:
-            prompt = await sys_prompt_runner.run(deps)
-            messages.append(_messages.SystemPrompt(prompt))
+            prompt = await sys_prompt_runner.run(run_context)
+            messages.append(_messages.SystemPromptPart(prompt))
         return messages
-    def _unknown_tool(self, tool_name: str) -> _messages.RetryPrompt:
-        self._incr_result_retry()
+    def _unknown_tool(self, tool_name: str, run_context: RunContext[AgentDeps]) -> _messages.RetryPromptPart:
+        self._incr_result_retry(run_context)
         names = list(self._function_tools.keys())
         if self._result_schema:
             names.extend(self._result_schema.tool_names())
@@ -890,7 +1082,7 @@ class Agent(Generic[AgentDeps, ResultData]):
             msg = f'Available tools: {", ".join(names)}'
         else:
             msg = 'No tools available.'
-        return _messages.RetryPrompt(content=f'Unknown tool name: {tool_name!r}. {msg}')
+        return _messages.RetryPromptPart(content=f'Unknown tool name: {tool_name!r}. {msg}')
     def _get_deps(self, deps: AgentDeps) -> AgentDeps:
         """Get deps for a run.
@@ -934,3 +1126,6 @@ class _MarkFinalResult(Generic[ResultData]):
     """
     data: ResultData
+    """The final result data."""
+    tool_name: str | None
+    """Name of the final result tool, None if the result is a string."""

pydantic-ai-slim 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

Potentially problematic release.

pydantic-ai-slim 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl