PyPI - pydantic-ai-slim - Versions diffs - 0.4.4__tar.gz → 0.4.6__tar.gz - Mend

pydantic-ai-slim 0.4.4tar.gz → 0.4.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (98) hide show

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/.gitignore RENAMED Viewed

@@ -15,7 +15,6 @@ examples/pydantic_ai_examples/.chat_app_messages.sqlite
 .vscode/
 /question_graph_history.json
 /docs-site/.wrangler/
-/CLAUDE.md
 node_modules/
 **.idea/
 .coverage*

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-ai-slim
-Version: 0.4.4
+Version: 0.4.6
 Summary: Agent Framework / shim to use Pydantic with LLMs, slim package
 Author-email: Samuel Colvin <samuel@pydantic.dev>, Marcelo Trylesinski <marcelotryle@gmail.com>, David Montague <david@pydantic.dev>, Alex Hall <alex@pydantic.dev>, Douwe Maan <douwe@pydantic.dev>
 License-Expression: MIT
@@ -30,7 +30,7 @@ Requires-Dist: exceptiongroup; python_version < '3.11'
 Requires-Dist: griffe>=1.3.2
 Requires-Dist: httpx>=0.27
 Requires-Dist: opentelemetry-api>=1.28.0
-Requires-Dist: pydantic-graph==0.4.4
+Requires-Dist: pydantic-graph==0.4.6
 Requires-Dist: pydantic>=2.10
 Requires-Dist: typing-inspection>=0.4.0
 Provides-Extra: a2a
@@ -51,7 +51,7 @@ Requires-Dist: cohere>=5.13.11; (platform_system != 'Emscripten') and extra == '
 Provides-Extra: duckduckgo
 Requires-Dist: ddgs>=9.0.0; extra == 'duckduckgo'
 Provides-Extra: evals
-Requires-Dist: pydantic-evals==0.4.4; extra == 'evals'
+Requires-Dist: pydantic-evals==0.4.6; extra == 'evals'
 Provides-Extra: google
 Requires-Dist: google-genai>=1.24.0; extra == 'google'
 Provides-Extra: groq
@@ -63,7 +63,7 @@ Requires-Dist: logfire>=3.11.0; extra == 'logfire'
 Provides-Extra: mcp
 Requires-Dist: mcp>=1.9.4; (python_version >= '3.10') and extra == 'mcp'
 Provides-Extra: mistral
-Requires-Dist: mistralai>=1.2.5; extra == 'mistral'
+Requires-Dist: mistralai>=1.9.2; extra == 'mistral'
 Provides-Extra: openai
 Requires-Dist: openai>=1.92.0; extra == 'openai'
 Provides-Extra: tavily

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/_a2a.py RENAMED Viewed

@@ -59,12 +59,12 @@ except ImportError as _import_error:
 @asynccontextmanager
-async def worker_lifespan(app: FastA2A, worker: Worker) -> AsyncIterator[None]:
+async def worker_lifespan(app: FastA2A, worker: Worker, agent: Agent[AgentDepsT, OutputDataT]) -> AsyncIterator[None]:
     """Custom lifespan that runs the worker during application startup.
     This ensures the worker is started and ready to process tasks as soon as the application starts.
     """
-    async with app.task_manager:
+    async with app.task_manager, agent:
         async with worker.run():
             yield
@@ -93,7 +93,7 @@ def agent_to_a2a(
     broker = broker or InMemoryBroker()
     worker = AgentWorker(agent=agent, broker=broker, storage=storage)
-    lifespan = lifespan or partial(worker_lifespan, worker=worker)
+    lifespan = lifespan or partial(worker_lifespan, worker=worker, agent=agent)
     return FastA2A(
         storage=storage,

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/_function_schema.py RENAMED Viewed

@@ -96,8 +96,13 @@ def function_schema(  # noqa: C901
     config = ConfigDict(title=function.__name__, use_attribute_docstrings=True)
     config_wrapper = ConfigWrapper(config)
     gen_schema = _generate_schema.GenerateSchema(config_wrapper)
+    errors: list[str] = []
-    sig = signature(function)
+    try:
+        sig = signature(function)
+    except ValueError as e:
+        errors.append(str(e))
+        sig = signature(lambda: None)
     type_hints = _typing_extra.get_function_type_hints(function)
@@ -105,7 +110,6 @@ def function_schema(  # noqa: C901
     fields: dict[str, core_schema.TypedDictField] = {}
     positional_fields: list[str] = []
     var_positional_field: str | None = None
-    errors: list[str] = []
     decorators = _decorators.DecoratorInfos()
     description, field_descriptions = doc_descriptions(function, sig, docstring_format=docstring_format)
@@ -235,14 +239,19 @@ def _takes_ctx(function: TargetFunc[P, R]) -> TypeIs[WithCtx[P, R]]:
     Returns:
         `True` if the function takes a `RunContext` as first argument, `False` otherwise.
     """
-    sig = signature(function)
+    try:
+        sig = signature(function)
+    except ValueError:  # pragma: no cover
+        return False  # pragma: no cover
     try:
         first_param_name = next(iter(sig.parameters.keys()))
     except StopIteration:
         return False
     else:
         type_hints = _typing_extra.get_function_type_hints(function)
-        annotation = type_hints[first_param_name]
+        annotation = type_hints.get(first_param_name)
+        if annotation is None:
+            return False  # pragma: no cover
         return True is not sig.empty and _is_call_ctx(annotation)

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/_output.py RENAMED Viewed

@@ -69,12 +69,31 @@ DEFAULT_OUTPUT_TOOL_NAME = 'final_result'
 DEFAULT_OUTPUT_TOOL_DESCRIPTION = 'The final response which ends this conversation'
-async def execute_output_function_with_span(
+async def execute_traced_output_function(
     function_schema: _function_schema.FunctionSchema,
     run_context: RunContext[AgentDepsT],
     args: dict[str, Any] | Any,
+    wrap_validation_errors: bool = True,
 ) -> Any:
-    """Execute a function call within a traced span, automatically recording the response."""
+    """Execute an output function within a traced span with error handling.
+    This function executes the output function within an OpenTelemetry span for observability,
+    automatically records the function response, and handles ModelRetry exceptions by converting
+    them to ToolRetryError when wrap_validation_errors is True.
+    Args:
+        function_schema: The function schema containing the function to execute
+        run_context: The current run context containing tracing and tool information
+        args: Arguments to pass to the function
+        wrap_validation_errors: If True, wrap ModelRetry exceptions in ToolRetryError
+    Returns:
+        The result of the function execution
+    Raises:
+        ToolRetryError: When wrap_validation_errors is True and a ModelRetry is caught
+        ModelRetry: When wrap_validation_errors is False and a ModelRetry occurs
+    """
     # Set up span attributes
     tool_name = run_context.tool_name or getattr(function_schema.function, '__name__', 'output_function')
     attributes = {
@@ -96,7 +115,19 @@ async def execute_output_function_with_span(
         )
     with run_context.tracer.start_as_current_span('running output function', attributes=attributes) as span:
-        output = await function_schema.call(args, run_context)
+        try:
+            output = await function_schema.call(args, run_context)
+        except ModelRetry as r:
+            if wrap_validation_errors:
+                m = _messages.RetryPromptPart(
+                    content=r.message,
+                    tool_name=run_context.tool_name,
+                )
+                if run_context.tool_call_id:
+                    m.tool_call_id = run_context.tool_call_id  # pragma: no cover
+                raise ToolRetryError(m) from r
+            else:
+                raise
         # Record response if content inclusion is enabled
         if run_context.trace_include_content and span.is_recording():
@@ -663,16 +694,7 @@ class ObjectOutputProcessor(BaseOutputProcessor[OutputDataT]):
             else:
                 raise
-        try:
-            output = await self.call(output, run_context)
-        except ModelRetry as r:
-            if wrap_validation_errors:
-                m = _messages.RetryPromptPart(
-                    content=r.message,
-                )
-                raise ToolRetryError(m) from r
-            else:
-                raise  # pragma: no cover
+        output = await self.call(output, run_context, wrap_validation_errors)
         return output
@@ -691,12 +713,15 @@ class ObjectOutputProcessor(BaseOutputProcessor[OutputDataT]):
         self,
         output: Any,
         run_context: RunContext[AgentDepsT],
+        wrap_validation_errors: bool = True,
     ):
         if k := self.outer_typed_dict_key:
             output = output[k]
         if self._function_schema:
-            output = await execute_output_function_with_span(self._function_schema, run_context, output)
+            output = await execute_traced_output_function(
+                self._function_schema, run_context, output, wrap_validation_errors
+            )
         return output
@@ -856,16 +881,7 @@ class PlainTextOutputProcessor(BaseOutputProcessor[OutputDataT]):
         wrap_validation_errors: bool = True,
     ) -> OutputDataT:
         args = {self._str_argument_name: data}
-        try:
-            output = await execute_output_function_with_span(self._function_schema, run_context, args)
-        except ModelRetry as r:
-            if wrap_validation_errors:
-                m = _messages.RetryPromptPart(
-                    content=r.message,
-                )
-                raise ToolRetryError(m) from r
-            else:
-                raise  # pragma: no cover
+        output = await execute_traced_output_function(self._function_schema, run_context, args, wrap_validation_errors)
         return cast(OutputDataT, output)
@@ -975,7 +991,7 @@ class OutputToolset(AbstractToolset[AgentDepsT]):
     async def call_tool(
         self, name: str, tool_args: dict[str, Any], ctx: RunContext[AgentDepsT], tool: ToolsetTool[AgentDepsT]
     ) -> Any:
-        output = await self.processors[name].call(tool_args, ctx)
+        output = await self.processors[name].call(tool_args, ctx, wrap_validation_errors=False)
         for validator in self.output_validators:
             output = await validator.validate(output, ctx, wrap_validation_errors=False)
         return output

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/agent.py RENAMED Viewed

@@ -36,7 +36,7 @@ from ._tool_manager import ToolManager
 from .models.instrumented import InstrumentationSettings, InstrumentedModel, instrument_model
 from .output import OutputDataT, OutputSpec
 from .profiles import ModelProfile
-from .result import FinalResult, StreamedRunResult
+from .result import AgentStream, FinalResult, StreamedRunResult
 from .settings import ModelSettings, merge_model_settings
 from .tools import (
     AgentDepsT,
@@ -843,14 +843,15 @@ class Agent(Generic[AgentDepsT, OutputDataT]):
                 agent_run = AgentRun(graph_run)
                 yield agent_run
                 if (final_result := agent_run.result) is not None and run_span.is_recording():
-                    run_span.set_attribute(
-                        'final_result',
-                        (
-                            final_result.output
-                            if isinstance(final_result.output, str)
-                            else json.dumps(InstrumentedModel.serialize_any(final_result.output))
-                        ),
-                    )
+                    if instrumentation_settings and instrumentation_settings.include_content:
+                        run_span.set_attribute(
+                            'final_result',
+                            (
+                                final_result.output
+                                if isinstance(final_result.output, str)
+                                else json.dumps(InstrumentedModel.serialize_any(final_result.output))
+                            ),
+                        )
         finally:
             try:
                 if instrumentation_settings and run_span.is_recording():
@@ -1126,29 +1127,15 @@ class Agent(Generic[AgentDepsT, OutputDataT]):
             while True:
                 if self.is_model_request_node(node):
                     graph_ctx = agent_run.ctx
-                    async with node._stream(graph_ctx) as streamed_response:  # pyright: ignore[reportPrivateUsage]
-                        async def stream_to_final(
-                            s: models.StreamedResponse,
-                        ) -> FinalResult[models.StreamedResponse] | None:
-                            output_schema = graph_ctx.deps.output_schema
-                            async for maybe_part_event in streamed_response:
-                                if isinstance(maybe_part_event, _messages.PartStartEvent):
-                                    new_part = maybe_part_event.part
-                                    if isinstance(new_part, _messages.TextPart) and isinstance(
-                                        output_schema, _output.TextOutputSchema
-                                    ):
-                                        return FinalResult(s, None, None)
-                                    elif isinstance(new_part, _messages.ToolCallPart) and (
-                                        tool_def := graph_ctx.deps.tool_manager.get_tool_def(new_part.tool_name)
-                                    ):
-                                        if tool_def.kind == 'output':
-                                            return FinalResult(s, new_part.tool_name, new_part.tool_call_id)
-                                        elif tool_def.kind == 'deferred':
-                                            return FinalResult(s, None, None)
+                    async with node.stream(graph_ctx) as stream:
+                        async def stream_to_final(s: AgentStream) -> FinalResult[AgentStream] | None:
+                            async for event in stream:
+                                if isinstance(event, _messages.FinalResultEvent):
+                                    return FinalResult(s, event.tool_name, event.tool_call_id)
                             return None
-                        final_result = await stream_to_final(streamed_response)
+                        final_result = await stream_to_final(stream)
                         if final_result is not None:
                             if yielded:
                                 raise exceptions.AgentRunError('Agent run produced final results')  # pragma: no cover
@@ -1183,14 +1170,8 @@ class Agent(Generic[AgentDepsT, OutputDataT]):
                             yield StreamedRunResult(
                                 messages,
                                 graph_ctx.deps.new_message_index,
-                                graph_ctx.deps.usage_limits,
-                                streamed_response,
-                                graph_ctx.deps.output_schema,
-                                _agent_graph.build_run_context(graph_ctx),
-                                graph_ctx.deps.output_validators,
-                                final_result.tool_name,
+                                stream,
                                 on_complete,
-                                graph_ctx.deps.tool_manager,
                             )
                             break
                 next_node = await agent_run.next(node)

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/format_prompt.py RENAMED Viewed

@@ -13,9 +13,8 @@ __all__ = ('format_as_xml',)
 def format_as_xml(
     obj: Any,
-    root_tag: str = 'examples',
-    item_tag: str = 'example',
-    include_root_tag: bool = True,
+    root_tag: str | None = None,
+    item_tag: str = 'item',
     none_str: str = 'null',
     indent: str | None = '  ',
 ) -> str:
@@ -32,8 +31,6 @@ def format_as_xml(
         root_tag: Outer tag to wrap the XML in, use `None` to omit the outer tag.
         item_tag: Tag to use for each item in an iterable (e.g. list), this is overridden by the class name
             for dataclasses and Pydantic models.
-        include_root_tag: Whether to include the root tag in the output
-            (The root tag is always included if it includes a body - e.g. when the input is a simple value).
         none_str: String to use for `None` values.
         indent: Indentation string to use for pretty printing.
@@ -55,7 +52,7 @@ def format_as_xml(
     ```
     """
     el = _ToXml(item_tag=item_tag, none_str=none_str).to_xml(obj, root_tag)
-    if not include_root_tag and el.text is None:
+    if root_tag is None and el.text is None:
         join = '' if indent is None else '\n'
         return join.join(_rootless_xml_elements(el, indent))
     else:

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/models/__init__.py RENAMED Viewed

@@ -758,7 +758,7 @@ async def download_item(
     data_type = media_type
     if type_format == 'extension':
-        data_type = data_type.split('/')[1]
+        data_type = item.format
     data = response.content
     if data_format in ('base64', 'base64_uri'):

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/models/function.py RENAMED Viewed

@@ -16,9 +16,7 @@ from pydantic_ai.profiles import ModelProfileSpec
 from .. import _utils, usage
 from .._utils import PeekableAsyncStream
 from ..messages import (
-    AudioUrl,
     BinaryContent,
-    ImageUrl,
     ModelMessage,
     ModelRequest,
     ModelResponse,
@@ -345,18 +343,19 @@ def _estimate_usage(messages: Iterable[ModelMessage]) -> usage.Usage:
 def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
     if not content:
         return 0
     if isinstance(content, str):
-        return len(re.split(r'[\s",.:]+', content.strip()))
-    else:
-        tokens = 0
-        for part in content:
-            if isinstance(part, str):
-                tokens += len(re.split(r'[\s",.:]+', part.strip()))
-            # TODO(Marcelo): We need to study how we can estimate the tokens for these types of content.
-            if isinstance(part, (AudioUrl, ImageUrl)):
-                tokens += 0
-            elif isinstance(part, BinaryContent):
-                tokens += len(part.data)
-            else:
-                tokens += 0
-        return tokens
+        return len(_TOKEN_SPLIT_RE.split(content.strip()))
+    tokens = 0
+    for part in content:
+        if isinstance(part, str):
+            tokens += len(_TOKEN_SPLIT_RE.split(part.strip()))
+        elif isinstance(part, BinaryContent):
+            tokens += len(part.data)
+        # TODO(Marcelo): We need to study how we can estimate the tokens for AudioUrl or ImageUrl.
+    return tokens
+_TOKEN_SPLIT_RE = re.compile(r'[\s",.:]+')

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/models/gemini.py RENAMED Viewed

@@ -91,15 +91,6 @@ class GeminiModelSettings(ModelSettings, total=False):
     See the [Gemini API docs](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls) for use cases and limitations.
     """
-    gemini_thinking_config: ThinkingConfig
-    """Thinking is on by default in both the API and AI Studio.
-    Being on by default doesn't mean the model will send back thoughts. For that, you need to set `include_thoughts`
-    to `True`. If you want to turn it off, set `thinking_budget` to `0`.
-    See more about it on <https://ai.google.dev/gemini-api/docs/thinking>.
-    """
 @dataclass(init=False)
 class GeminiModel(Model):

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/models/instrumented.py RENAMED Viewed

@@ -156,7 +156,12 @@ class InstrumentationSettings:
         events: list[Event] = []
         instructions = InstrumentedModel._get_instructions(messages)  # pyright: ignore [reportPrivateUsage]
         if instructions is not None:
-            events.append(Event('gen_ai.system.message', body={'content': instructions, 'role': 'system'}))
+            events.append(
+                Event(
+                    'gen_ai.system.message',
+                    body={**({'content': instructions} if self.include_content else {}), 'role': 'system'},
+                )
+            )
         for message_index, message in enumerate(messages):
             message_events: list[Event] = []

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/models/mistral.py RENAMED Viewed

@@ -52,6 +52,7 @@ try:
         CompletionChunk as MistralCompletionChunk,
         Content as MistralContent,
         ContentChunk as MistralContentChunk,
+        DocumentURLChunk as MistralDocumentURLChunk,
         FunctionCall as MistralFunctionCall,
         ImageURL as MistralImageURL,
         ImageURLChunk as MistralImageURLChunk,
@@ -539,10 +540,19 @@ class MistralModel(Model):
                     if item.is_image:
                         image_url = MistralImageURL(url=f'data:{item.media_type};base64,{base64_encoded}')
                         content.append(MistralImageURLChunk(image_url=image_url, type='image_url'))
+                    elif item.media_type == 'application/pdf':
+                        content.append(
+                            MistralDocumentURLChunk(
+                                document_url=f'data:application/pdf;base64,{base64_encoded}', type='document_url'
+                            )
+                        )
                     else:
-                        raise RuntimeError('Only image binary content is supported for Mistral.')
+                        raise RuntimeError('BinaryContent other than image or PDF is not supported in Mistral.')
                 elif isinstance(item, DocumentUrl):
-                    raise RuntimeError('DocumentUrl is not supported in Mistral.')  # pragma: no cover
+                    if item.media_type == 'application/pdf':
+                        content.append(MistralDocumentURLChunk(document_url=item.url, type='document_url'))
+                    else:
+                        raise RuntimeError('DocumentUrl other than PDF is not supported in Mistral.')
                 elif isinstance(item, VideoUrl):
                     raise RuntimeError('VideoUrl is not supported in Mistral.')
                 else:  # pragma: no cover

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/models/openai.py RENAMED Viewed

@@ -8,6 +8,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any, Literal, Union, cast, overload
+from pydantic import ValidationError
 from typing_extensions import assert_never
 from pydantic_ai._thinking_part import split_content_into_text_and_thinking
@@ -347,8 +348,19 @@ class OpenAIModel(Model):
                 raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
             raise  # pragma: no cover
-    def _process_response(self, response: chat.ChatCompletion) -> ModelResponse:
+    def _process_response(self, response: chat.ChatCompletion | str) -> ModelResponse:
         """Process a non-streamed response, and prepare a message to return."""
+        # Although the OpenAI SDK claims to return a Pydantic model (`ChatCompletion`) from the chat completions function:
+        # * it hasn't actually performed validation (presumably they're creating the model with `model_construct` or something?!)
+        # * if the endpoint returns plain text, the return type is a string
+        # Thus we validate it fully here.
+        if not isinstance(response, chat.ChatCompletion):
+            raise UnexpectedModelBehavior('Invalid response from OpenAI chat completions endpoint, expected JSON data')
+        try:
+            response = chat.ChatCompletion.model_validate(response.model_dump())
+        except ValidationError as e:
+            raise UnexpectedModelBehavior(f'Invalid response from OpenAI chat completions endpoint: {e}') from e
         timestamp = number_to_datetime(response.created)
         choice = response.choices[0]
         items: list[ModelResponsePart] = []

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pydantic_ai/result.py RENAMED Viewed

@@ -63,22 +63,18 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
         async for response in self.stream_responses(debounce_by=debounce_by):
             if self._final_result_event is not None:
                 try:
-                    yield await self._validate_response(
-                        response, self._final_result_event.tool_name, allow_partial=True
-                    )
+                    yield await self._validate_response(response, allow_partial=True)
                 except ValidationError:
                     pass
         if self._final_result_event is not None:  # pragma: no branch
-            yield await self._validate_response(
-                self._raw_stream_response.get(), self._final_result_event.tool_name, allow_partial=False
-            )
+            yield await self._validate_response(self._raw_stream_response.get(), allow_partial=False)
     async def stream_responses(self, *, debounce_by: float | None = 0.1) -> AsyncIterator[_messages.ModelResponse]:
         """Asynchronously stream the (unvalidated) model responses for the agent."""
         # if the message currently has any parts with content, yield before streaming
         msg = self._raw_stream_response.get()
         for part in msg.parts:
-            if part.has_content():  # pragma: no cover
+            if part.has_content():
                 yield msg
                 break
@@ -86,6 +82,35 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
             async for _items in group_iter:
                 yield self._raw_stream_response.get()  # current state of the response
+    async def stream_text(self, *, delta: bool = False, debounce_by: float | None = 0.1) -> AsyncIterator[str]:
+        """Stream the text result as an async iterable.
+        !!! note
+            Result validators will NOT be called on the text result if `delta=True`.
+        Args:
+            delta: if `True`, yield each chunk of text as it is received, if `False` (default), yield the full text
+                up to the current point.
+            debounce_by: by how much (if at all) to debounce/group the response chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured responses to reduce the overhead of
+                performing validation as each token is received.
+        """
+        if not isinstance(self._output_schema, PlainTextOutputSchema):
+            raise exceptions.UserError('stream_text() can only be used with text responses')
+        if delta:
+            async for text in self._stream_response_text(delta=True, debounce_by=debounce_by):
+                yield text
+        else:
+            async for text in self._stream_response_text(delta=False, debounce_by=debounce_by):
+                for validator in self._output_validators:
+                    text = await validator.validate(text, self._run_ctx)  # pragma: no cover
+                yield text
+    def get(self) -> _messages.ModelResponse:
+        """Get the current state of the response."""
+        return self._raw_stream_response.get()
     def usage(self) -> Usage:
         """Return the usage of the whole run.
@@ -94,10 +119,24 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
         """
         return self._initial_run_ctx_usage + self._raw_stream_response.usage()
-    async def _validate_response(
-        self, message: _messages.ModelResponse, output_tool_name: str | None, *, allow_partial: bool = False
-    ) -> OutputDataT:
+    def timestamp(self) -> datetime:
+        """Get the timestamp of the response."""
+        return self._raw_stream_response.timestamp
+    async def get_output(self) -> OutputDataT:
+        """Stream the whole response, validate the output and return it."""
+        async for _ in self:
+            pass
+        return await self._validate_response(self._raw_stream_response.get(), allow_partial=False)
+    async def _validate_response(self, message: _messages.ModelResponse, *, allow_partial: bool = False) -> OutputDataT:
         """Validate a structured result message."""
+        if self._final_result_event is None:
+            raise exceptions.UnexpectedModelBehavior('Invalid response, unable to find output')  # pragma: no cover
+        output_tool_name = self._final_result_event.tool_name
         if isinstance(self._output_schema, ToolOutputSchema) and output_tool_name is not None:
             tool_call = next(
                 (
@@ -114,7 +153,7 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
             return await self._tool_manager.handle_call(tool_call, allow_partial=allow_partial)
         elif deferred_tool_calls := self._tool_manager.get_deferred_tool_calls(message.parts):
             if not self._output_schema.allows_deferred_tool_calls:
-                raise exceptions.UserError(  # pragma: no cover
+                raise exceptions.UserError(
                     'A deferred tool call was present, but `DeferredToolCalls` is not among output types. To resolve this, add `DeferredToolCalls` to the list of output types for this agent.'
                 )
             return cast(OutputDataT, deferred_tool_calls)
@@ -132,6 +171,54 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
                 'Invalid response, unable to process text output'
             )
+    async def _stream_response_text(
+        self, *, delta: bool = False, debounce_by: float | None = 0.1
+    ) -> AsyncIterator[str]:
+        """Stream the response as an async iterable of text."""
+        # Define a "merged" version of the iterator that will yield items that have already been retrieved
+        # and items that we receive while streaming. We define a dedicated async iterator for this so we can
+        # pass the combined stream to the group_by_temporal function within `_stream_text_deltas` below.
+        async def _stream_text_deltas_ungrouped() -> AsyncIterator[tuple[str, int]]:
+            # yields tuples of (text_content, part_index)
+            # we don't currently make use of the part_index, but in principle this may be useful
+            # so we retain it here for now to make possible future refactors simpler
+            msg = self._raw_stream_response.get()
+            for i, part in enumerate(msg.parts):
+                if isinstance(part, _messages.TextPart) and part.content:
+                    yield part.content, i
+            async for event in self._raw_stream_response:
+                if (
+                    isinstance(event, _messages.PartStartEvent)
+                    and isinstance(event.part, _messages.TextPart)
+                    and event.part.content
+                ):
+                    yield event.part.content, event.index  # pragma: no cover
+                elif (  # pragma: no branch
+                    isinstance(event, _messages.PartDeltaEvent)
+                    and isinstance(event.delta, _messages.TextPartDelta)
+                    and event.delta.content_delta
+                ):
+                    yield event.delta.content_delta, event.index
+        async def _stream_text_deltas() -> AsyncIterator[str]:
+            async with _utils.group_by_temporal(_stream_text_deltas_ungrouped(), debounce_by) as group_iter:
+                async for items in group_iter:
+                    # Note: we are currently just dropping the part index on the group here
+                    yield ''.join([content for content, _ in items])
+        if delta:
+            async for text in _stream_text_deltas():
+                yield text
+        else:
+            # a quick benchmark shows it's faster to build up a string with concat when we're
+            # yielding at each step
+            deltas: list[str] = []
+            async for text in _stream_text_deltas():
+                deltas.append(text)
+                yield ''.join(deltas)
     def __aiter__(self) -> AsyncIterator[AgentStreamEvent]:
         """Stream [`AgentStreamEvent`][pydantic_ai.messages.AgentStreamEvent]s.
@@ -189,16 +276,9 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
     _all_messages: list[_messages.ModelMessage]
     _new_message_index: int
-    _usage_limits: UsageLimits | None
-    _stream_response: models.StreamedResponse
-    _output_schema: OutputSchema[OutputDataT]
-    _run_ctx: RunContext[AgentDepsT]
-    _output_validators: list[OutputValidator[AgentDepsT, OutputDataT]]
-    _output_tool_name: str | None
+    _stream_response: AgentStream[AgentDepsT, OutputDataT]
     _on_complete: Callable[[], Awaitable[None]]
-    _tool_manager: ToolManager[AgentDepsT]
-    _initial_run_ctx_usage: Usage = field(init=False)
     is_complete: bool = field(default=False, init=False)
     """Whether the stream has all been received.
@@ -209,9 +289,6 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
     [`get_output`][pydantic_ai.result.StreamedRunResult.get_output] completes.
     """
-    def __post_init__(self):
-        self._initial_run_ctx_usage = copy(self._run_ctx.usage)
     @overload
     def all_messages(self, *, output_tool_return_content: str | None = None) -> list[_messages.ModelMessage]: ...
@@ -332,12 +409,9 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
         Returns:
             An async iterable of the response data.
         """
-        async for structured_message, is_last in self.stream_structured(debounce_by=debounce_by):
-            try:
-                yield await self.validate_structured_output(structured_message, allow_partial=not is_last)
-            except ValidationError:
-                if is_last:
-                    raise  # pragma: no cover
+        async for output in self._stream_response.stream_output(debounce_by=debounce_by):
+            yield output
+        await self._marked_completed(self._stream_response.get())
     async def stream_text(self, *, delta: bool = False, debounce_by: float | None = 0.1) -> AsyncIterator[str]:
         """Stream the text result as an async iterable.
@@ -352,16 +426,8 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
                 Debouncing is particularly important for long structured responses to reduce the overhead of
                 performing validation as each token is received.
         """
-        if not isinstance(self._output_schema, PlainTextOutputSchema):
-            raise exceptions.UserError('stream_text() can only be used with text responses')
-        if delta:
-            async for text in self._stream_response_text(delta=delta, debounce_by=debounce_by):
-                yield text
-        else:
-            async for text in self._stream_response_text(delta=delta, debounce_by=debounce_by):
-                combined_validated_text = await self._validate_text_output(text)
-                yield combined_validated_text
+        async for text in self._stream_response.stream_text(delta=delta, debounce_by=debounce_by):
+            yield text
         await self._marked_completed(self._stream_response.get())
     async def stream_structured(
@@ -378,13 +444,7 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
             An async iterable of the structured response message and whether that is the last message.
         """
         # if the message currently has any parts with content, yield before streaming
-        msg = self._stream_response.get()
-        for part in msg.parts:
-            if part.has_content():
-                yield msg, False
-                break
-        async for msg in self._stream_response_structured(debounce_by=debounce_by):
+        async for msg in self._stream_response.stream_responses(debounce_by=debounce_by):
             yield msg, False
         msg = self._stream_response.get()
@@ -394,15 +454,9 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
     async def get_output(self) -> OutputDataT:
         """Stream the whole response, validate and return it."""
-        usage_checking_stream = _get_usage_checking_stream_response(
-            self._stream_response, self._usage_limits, self.usage
-        )
-        async for _ in usage_checking_stream:
-            pass
-        message = self._stream_response.get()
-        await self._marked_completed(message)
-        return await self.validate_structured_output(message)
+        output = await self._stream_response.get_output()
+        await self._marked_completed(self._stream_response.get())
+        return output
     @deprecated('`get_data` is deprecated, use `get_output` instead.')
     async def get_data(self) -> OutputDataT:
@@ -414,11 +468,11 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
         !!! note
             This won't return the full usage until the stream is finished.
         """
-        return self._initial_run_ctx_usage + self._stream_response.usage()
+        return self._stream_response.usage()
     def timestamp(self) -> datetime:
         """Get the timestamp of the response."""
-        return self._stream_response.timestamp
+        return self._stream_response.timestamp()
     @deprecated('`validate_structured_result` is deprecated, use `validate_structured_output` instead.')
     async def validate_structured_result(
@@ -430,105 +484,15 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
         self, message: _messages.ModelResponse, *, allow_partial: bool = False
     ) -> OutputDataT:
         """Validate a structured result message."""
-        if isinstance(self._output_schema, ToolOutputSchema) and self._output_tool_name is not None:
-            tool_call = next(
-                (
-                    part
-                    for part in message.parts
-                    if isinstance(part, _messages.ToolCallPart) and part.tool_name == self._output_tool_name
-                ),
-                None,
-            )
-            if tool_call is None:
-                raise exceptions.UnexpectedModelBehavior(  # pragma: no cover
-                    f'Invalid response, unable to find tool call for {self._output_tool_name!r}'
-                )
-            return await self._tool_manager.handle_call(tool_call, allow_partial=allow_partial)
-        elif deferred_tool_calls := self._tool_manager.get_deferred_tool_calls(message.parts):
-            if not self._output_schema.allows_deferred_tool_calls:
-                raise exceptions.UserError(
-                    'A deferred tool call was present, but `DeferredToolCalls` is not among output types. To resolve this, add `DeferredToolCalls` to the list of output types for this agent.'
-                )
-            return cast(OutputDataT, deferred_tool_calls)
-        elif isinstance(self._output_schema, TextOutputSchema):
-            text = '\n\n'.join(x.content for x in message.parts if isinstance(x, _messages.TextPart))
-            result_data = await self._output_schema.process(
-                text, self._run_ctx, allow_partial=allow_partial, wrap_validation_errors=False
-            )
-            for validator in self._output_validators:
-                result_data = await validator.validate(result_data, self._run_ctx)  # pragma: no cover
-            return result_data
-        else:
-            raise exceptions.UnexpectedModelBehavior(  # pragma: no cover
-                'Invalid response, unable to process text output'
-            )
-    async def _validate_text_output(self, text: str) -> str:
-        for validator in self._output_validators:
-            text = await validator.validate(text, self._run_ctx)  # pragma: no cover
-        return text
+        return await self._stream_response._validate_response(  # pyright: ignore[reportPrivateUsage]
+            message, allow_partial=allow_partial
+        )
     async def _marked_completed(self, message: _messages.ModelResponse) -> None:
         self.is_complete = True
         self._all_messages.append(message)
         await self._on_complete()
-    async def _stream_response_structured(
-        self, *, debounce_by: float | None = 0.1
-    ) -> AsyncIterator[_messages.ModelResponse]:
-        async with _utils.group_by_temporal(self._stream_response, debounce_by) as group_iter:
-            async for _items in group_iter:
-                yield self._stream_response.get()
-    async def _stream_response_text(
-        self, *, delta: bool = False, debounce_by: float | None = 0.1
-    ) -> AsyncIterator[str]:
-        """Stream the response as an async iterable of text."""
-        # Define a "merged" version of the iterator that will yield items that have already been retrieved
-        # and items that we receive while streaming. We define a dedicated async iterator for this so we can
-        # pass the combined stream to the group_by_temporal function within `_stream_text_deltas` below.
-        async def _stream_text_deltas_ungrouped() -> AsyncIterator[tuple[str, int]]:
-            # yields tuples of (text_content, part_index)
-            # we don't currently make use of the part_index, but in principle this may be useful
-            # so we retain it here for now to make possible future refactors simpler
-            msg = self._stream_response.get()
-            for i, part in enumerate(msg.parts):
-                if isinstance(part, _messages.TextPart) and part.content:
-                    yield part.content, i
-            async for event in self._stream_response:
-                if (
-                    isinstance(event, _messages.PartStartEvent)
-                    and isinstance(event.part, _messages.TextPart)
-                    and event.part.content
-                ):
-                    yield event.part.content, event.index  # pragma: no cover
-                elif (  # pragma: no branch
-                    isinstance(event, _messages.PartDeltaEvent)
-                    and isinstance(event.delta, _messages.TextPartDelta)
-                    and event.delta.content_delta
-                ):
-                    yield event.delta.content_delta, event.index
-        async def _stream_text_deltas() -> AsyncIterator[str]:
-            async with _utils.group_by_temporal(_stream_text_deltas_ungrouped(), debounce_by) as group_iter:
-                async for items in group_iter:
-                    # Note: we are currently just dropping the part index on the group here
-                    yield ''.join([content for content, _ in items])
-        if delta:
-            async for text in _stream_text_deltas():
-                yield text
-        else:
-            # a quick benchmark shows it's faster to build up a string with concat when we're
-            # yielding at each step
-            deltas: list[str] = []
-            async for text in _stream_text_deltas():
-                deltas.append(text)
-                yield ''.join(deltas)
 @dataclass(repr=False)
 class FinalResult(Generic[OutputDataT]):
@@ -556,12 +520,12 @@ def _get_usage_checking_stream_response(
 ) -> AsyncIterable[_messages.ModelResponseStreamEvent]:
     if limits is not None and limits.has_token_limits():
-        async def _usage_checking_iterator():  # pragma: no cover
+        async def _usage_checking_iterator():
             async for item in stream_response:
                 limits.check_tokens(get_usage())
                 yield item
-        return _usage_checking_iterator()  # pragma: no cover
+        return _usage_checking_iterator()
     else:
         return stream_response

{pydantic_ai_slim-0.4.4 → pydantic_ai_slim-0.4.6}/pyproject.toml RENAMED Viewed

@@ -68,7 +68,7 @@ vertexai = ["google-auth>=2.36.0", "requests>=2.32.2"]
 google = ["google-genai>=1.24.0"]
 anthropic = ["anthropic>=0.52.0"]
 groq = ["groq>=0.19.0"]
-mistral = ["mistralai>=1.2.5"]
+mistral = ["mistralai>=1.9.2"]
 bedrock = ["boto3>=1.37.24"]
 huggingface = ["huggingface-hub[inference]>=0.33.2"]
 # Tools