PyPI - pydantic-ai-slim - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

pydantic-ai-slim 0.0.18py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (22) hide show

pydantic_ai/_griffe.py +10 -3
pydantic_ai/_parts_manager.py +239 -0
pydantic_ai/_pydantic.py +16 -3
pydantic_ai/_utils.py +80 -17
pydantic_ai/agent.py +82 -74
pydantic_ai/format_as_xml.py +2 -1
pydantic_ai/messages.py +218 -9
pydantic_ai/models/__init__.py +31 -72
pydantic_ai/models/anthropic.py +21 -21
pydantic_ai/models/function.py +47 -79
pydantic_ai/models/gemini.py +76 -122
pydantic_ai/models/groq.py +53 -125
pydantic_ai/models/mistral.py +75 -137
pydantic_ai/models/ollama.py +1 -0
pydantic_ai/models/openai.py +50 -125
pydantic_ai/models/test.py +40 -73
pydantic_ai/result.py +91 -92
pydantic_ai/tools.py +24 -5
{pydantic_ai_slim-0.0.18.dist-info → pydantic_ai_slim-0.0.19.dist-info}/METADATA +3 -1
pydantic_ai_slim-0.0.19.dist-info/RECORD +29 -0
pydantic_ai_slim-0.0.18.dist-info/RECORD +0 -28
{pydantic_ai_slim-0.0.18.dist-info → pydantic_ai_slim-0.0.19.dist-info}/WHEEL +0 -0

pydantic_ai/models/__init__.py CHANGED Viewed

@@ -7,20 +7,22 @@ specific LLM being used.
 from __future__ import annotations as _annotations
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Iterable, Iterator
+from collections.abc import AsyncIterator, Iterator
 from contextlib import asynccontextmanager, contextmanager
+from dataclasses import dataclass, field
 from datetime import datetime
 from functools import cache
-from typing import TYPE_CHECKING, Literal, Union
+from typing import TYPE_CHECKING, Literal
 import httpx
+from .._parts_manager import ModelResponsePartsManager
 from ..exceptions import UserError
-from ..messages import ModelMessage, ModelResponse
+from ..messages import ModelMessage, ModelResponse, ModelResponseStreamEvent
 from ..settings import ModelSettings
+from ..usage import Usage
 if TYPE_CHECKING:
-    from ..result import Usage
     from ..tools import ToolDefinition
@@ -70,6 +72,7 @@ KnownModelName = Literal[
     'ollama:mistral-nemo',
     'ollama:mixtral',
     'ollama:phi3',
+    'ollama:phi4',
     'ollama:qwq',
     'ollama:qwen',
     'ollama:qwen2',
@@ -129,88 +132,47 @@ class AgentModel(ABC):
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> AsyncIterator[EitherStreamedResponse]:
+    ) -> AsyncIterator[StreamedResponse]:
         """Make a request to the model and return a streaming response."""
+        # This method is not required, but you need to implement it if you want to support streamed responses
         raise NotImplementedError(f'Streamed requests not supported by this {self.__class__.__name__}')
         # yield is required to make this a generator for type checking
         # noinspection PyUnreachableCode
         yield  # pragma: no cover
-class StreamTextResponse(ABC):
-    """Streamed response from an LLM when returning text."""
-    def __aiter__(self) -> AsyncIterator[None]:
-        """Stream the response as an async iterable, building up the text as it goes.
-        This is an async iterator that yields `None` to avoid doing the work of validating the input and
-        extracting the text field when it will often be thrown away.
-        """
-        return self
-    @abstractmethod
-    async def __anext__(self) -> None:
-        """Process the next chunk of the response, see above for why this returns `None`."""
-        raise NotImplementedError()
-    @abstractmethod
-    def get(self, *, final: bool = False) -> Iterable[str]:
-        """Returns an iterable of text since the last call to `get()` — e.g. the text delta.
-        Args:
-            final: If True, this is the final call, after iteration is complete, the response should be fully validated
-                and all text extracted.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def usage(self) -> Usage:
-        """Return the usage of the request.
-        NOTE: this won't return the full usage until the stream is finished.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def timestamp(self) -> datetime:
-        """Get the timestamp of the response."""
-        raise NotImplementedError()
-class StreamStructuredResponse(ABC):
+@dataclass
+class StreamedResponse(ABC):
     """Streamed response from an LLM when calling a tool."""
-    def __aiter__(self) -> AsyncIterator[None]:
-        """Stream the response as an async iterable, building up the tool call as it goes.
+    _usage: Usage = field(default_factory=Usage, init=False)
+    _parts_manager: ModelResponsePartsManager = field(default_factory=ModelResponsePartsManager, init=False)
+    _event_iterator: AsyncIterator[ModelResponseStreamEvent] | None = field(default=None, init=False)
-        This is an async iterator that yields `None` to avoid doing the work of building the final tool call when
-        it will often be thrown away.
-        """
-        return self
+    def __aiter__(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        """Stream the response as an async iterable of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s."""
+        if self._event_iterator is None:
+            self._event_iterator = self._get_event_iterator()
+        return self._event_iterator
     @abstractmethod
-    async def __anext__(self) -> None:
-        """Process the next chunk of the response, see above for why this returns `None`."""
-        raise NotImplementedError()
-    @abstractmethod
-    def get(self, *, final: bool = False) -> ModelResponse:
-        """Get the `ModelResponse` at this point.
-        The `ModelResponse` may or may not be complete, depending on whether the stream is finished.
+    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        """Return an async iterator of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s.
-        Args:
-            final: If True, this is the final call, after iteration is complete, the response should be fully validated.
+        This method should be implemented by subclasses to translate the vendor-specific stream of events into
+        pydantic_ai-format events.
         """
         raise NotImplementedError()
+        # noinspection PyUnreachableCode
+        yield
-    @abstractmethod
-    def usage(self) -> Usage:
-        """Get the usage of the request.
+    def get(self) -> ModelResponse:
+        """Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far."""
+        return ModelResponse(parts=self._parts_manager.get_parts(), timestamp=self.timestamp())
-        NOTE: this won't return the full usage until the stream is finished.
-        """
-        raise NotImplementedError()
+    def usage(self) -> Usage:
+        """Get the usage of the response so far. This will not be the final usage until the stream is exhausted."""
+        return self._usage
     @abstractmethod
     def timestamp(self) -> datetime:
@@ -218,9 +180,6 @@ class StreamStructuredResponse(ABC):
         raise NotImplementedError()
-EitherStreamedResponse = Union[StreamTextResponse, StreamStructuredResponse]
 ALLOW_MODEL_REQUESTS = True
 """Whether to allow requests to models.

pydantic_ai/models/anthropic.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Literal, Union, cast, overload
 from httpx import AsyncClient as AsyncHTTPClient
 from typing_extensions import assert_never
-from .. import result
+from .. import usage
 from .._utils import guard_tool_call_id as _guard_tool_call_id
 from ..messages import (
     ArgsDict,
@@ -27,8 +27,8 @@ from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import (
     AgentModel,
-    EitherStreamedResponse,
     Model,
+    StreamedResponse,
     cached_async_http_client,
     check_allow_model_requests,
 )
@@ -158,14 +158,14 @@ class AnthropicAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, result.Usage]:
+    ) -> tuple[ModelResponse, usage.Usage]:
         response = await self._messages_create(messages, False, model_settings)
         return self._process_response(response), _map_usage(response)
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> AsyncIterator[EitherStreamedResponse]:
+    ) -> AsyncIterator[StreamedResponse]:
         response = await self._messages_create(messages, True, model_settings)
         async with response:
             yield await self._process_streamed_response(response)
@@ -216,28 +216,28 @@ class AnthropicAgentModel(AgentModel):
         items: list[ModelResponsePart] = []
         for item in response.content:
             if isinstance(item, TextBlock):
-                items.append(TextPart(item.text))
+                items.append(TextPart(content=item.text))
             else:
                 assert isinstance(item, ToolUseBlock), 'unexpected item type'
                 items.append(
                     ToolCallPart.from_raw_args(
-                        item.name,
-                        cast(dict[str, Any], item.input),
-                        item.id,
+                        tool_name=item.name,
+                        args=cast(dict[str, Any], item.input),
+                        tool_call_id=item.id,
                     )
                 )
         return ModelResponse(items)
     @staticmethod
-    async def _process_streamed_response(response: AsyncStream[RawMessageStreamEvent]) -> EitherStreamedResponse:
+    async def _process_streamed_response(response: AsyncStream[RawMessageStreamEvent]) -> StreamedResponse:
         """TODO: Process a streamed response, and prepare a streaming response to return."""
         # We don't yet support streamed responses from Anthropic, so we raise an error here for now.
         # Streamed responses will be supported in a future release.
         raise RuntimeError('Streamed responses are not yet supported for Anthropic models.')
-        # Should be returning some sort of AnthropicStreamTextResponse or AnthropicStreamStructuredResponse
+        # Should be returning some sort of AnthropicStreamTextResponse or AnthropicStreamedResponse
         # depending on the type of chunk we get, but we need to establish how we handle (and when we get) the following:
         # RawMessageStartEvent
         # RawMessageDeltaEvent
@@ -315,30 +315,30 @@ def _map_tool_call(t: ToolCallPart) -> ToolUseBlockParam:
     )
-def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> result.Usage:
+def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage:
     if isinstance(message, AnthropicMessage):
-        usage = message.usage
+        response_usage = message.usage
     else:
         if isinstance(message, RawMessageStartEvent):
-            usage = message.message.usage
+            response_usage = message.message.usage
         elif isinstance(message, RawMessageDeltaEvent):
-            usage = message.usage
+            response_usage = message.usage
         else:
             # No usage information provided in:
             # - RawMessageStopEvent
             # - RawContentBlockStartEvent
             # - RawContentBlockDeltaEvent
             # - RawContentBlockStopEvent
-            usage = None
+            response_usage = None
-    if usage is None:
-        return result.Usage()
+    if response_usage is None:
+        return usage.Usage()
-    request_tokens = getattr(usage, 'input_tokens', None)
+    request_tokens = getattr(response_usage, 'input_tokens', None)
-    return result.Usage(
+    return usage.Usage(
         # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence this getattr
         request_tokens=request_tokens,
-        response_tokens=usage.output_tokens,
-        total_tokens=(request_tokens or 0) + usage.output_tokens,
+        response_tokens=response_usage.output_tokens,
+        total_tokens=(request_tokens or 0) + response_usage.output_tokens,
     )

pydantic_ai/models/function.py CHANGED Viewed

@@ -7,16 +7,17 @@ from contextlib import asynccontextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime
 from itertools import chain
-from typing import Callable, Union, cast
+from typing import Callable, Union
 from typing_extensions import TypeAlias, assert_never, overload
-from .. import _utils, result
+from .. import _utils, usage
+from .._utils import PeekableAsyncStream
 from ..messages import (
     ModelMessage,
     ModelRequest,
     ModelResponse,
-    ModelResponsePart,
+    ModelResponseStreamEvent,
     RetryPromptPart,
     SystemPromptPart,
     TextPart,
@@ -26,7 +27,7 @@ from ..messages import (
 )
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
-from . import AgentModel, EitherStreamedResponse, Model, StreamStructuredResponse, StreamTextResponse
+from . import AgentModel, Model, StreamedResponse
 @dataclass(init=False)
@@ -142,7 +143,7 @@ class FunctionAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, result.Usage]:
+    ) -> tuple[ModelResponse, usage.Usage]:
         agent_info = replace(self.agent_info, model_settings=model_settings)
         assert self.function is not None, 'FunctionModel must receive a `function` to support non-streamed requests'
@@ -158,90 +159,55 @@ class FunctionAgentModel(AgentModel):
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> AsyncIterator[EitherStreamedResponse]:
+    ) -> AsyncIterator[StreamedResponse]:
         assert (
             self.stream_function is not None
         ), 'FunctionModel must receive a `stream_function` to support streamed requests'
-        response_stream = self.stream_function(messages, self.agent_info)
-        try:
-            first = await response_stream.__anext__()
-        except StopAsyncIteration as e:
-            raise ValueError('Stream function must return at least one item') from e
-        if isinstance(first, str):
-            text_stream = cast(AsyncIterator[str], response_stream)
-            yield FunctionStreamTextResponse(first, text_stream)
-        else:
-            structured_stream = cast(AsyncIterator[DeltaToolCalls], response_stream)
-            yield FunctionStreamStructuredResponse(first, structured_stream)
-@dataclass
-class FunctionStreamTextResponse(StreamTextResponse):
-    """Implementation of `StreamTextResponse` for [FunctionModel][pydantic_ai.models.function.FunctionModel]."""
-    _next: str | None
-    _iter: AsyncIterator[str]
-    _timestamp: datetime = field(default_factory=_utils.now_utc, init=False)
-    _buffer: list[str] = field(default_factory=list, init=False)
-    async def __anext__(self) -> None:
-        if self._next is not None:
-            self._buffer.append(self._next)
-            self._next = None
-        else:
-            self._buffer.append(await self._iter.__anext__())
-    def get(self, *, final: bool = False) -> Iterable[str]:
-        yield from self._buffer
-        self._buffer.clear()
+        response_stream = PeekableAsyncStream(self.stream_function(messages, self.agent_info))
-    def usage(self) -> result.Usage:
-        return result.Usage()
+        first = await response_stream.peek()
+        if isinstance(first, _utils.Unset):
+            raise ValueError('Stream function must return at least one item')
-    def timestamp(self) -> datetime:
-        return self._timestamp
+        yield FunctionStreamedResponse(response_stream)
 @dataclass
-class FunctionStreamStructuredResponse(StreamStructuredResponse):
-    """Implementation of `StreamStructuredResponse` for [FunctionModel][pydantic_ai.models.function.FunctionModel]."""
+class FunctionStreamedResponse(StreamedResponse):
+    """Implementation of `StreamedResponse` for [FunctionModel][pydantic_ai.models.function.FunctionModel]."""
-    _next: DeltaToolCalls | None
-    _iter: AsyncIterator[DeltaToolCalls]
-    _delta_tool_calls: dict[int, DeltaToolCall] = field(default_factory=dict)
+    _iter: AsyncIterator[str | DeltaToolCalls]
     _timestamp: datetime = field(default_factory=_utils.now_utc)
-    async def __anext__(self) -> None:
-        if self._next is not None:
-            tool_call = self._next
-            self._next = None
-        else:
-            tool_call = await self._iter.__anext__()
+    def __post_init__(self):
+        self._usage += _estimate_usage([])
-        for key, new in tool_call.items():
-            if current := self._delta_tool_calls.get(key):
-                current.name = _utils.add_optional(current.name, new.name)
-                current.json_args = _utils.add_optional(current.json_args, new.json_args)
+    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        async for item in self._iter:
+            if isinstance(item, str):
+                response_tokens = _estimate_string_tokens(item)
+                self._usage += usage.Usage(response_tokens=response_tokens, total_tokens=response_tokens)
+                yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=item)
             else:
-                self._delta_tool_calls[key] = new
-    def get(self, *, final: bool = False) -> ModelResponse:
-        calls: list[ModelResponsePart] = []
-        for c in self._delta_tool_calls.values():
-            if c.name is not None and c.json_args is not None:
-                calls.append(ToolCallPart.from_raw_args(c.name, c.json_args))
-        return ModelResponse(calls, timestamp=self._timestamp)
-    def usage(self) -> result.Usage:
-        return _estimate_usage([self.get()])
+                delta_tool_calls = item
+                for dtc_index, delta_tool_call in delta_tool_calls.items():
+                    if delta_tool_call.json_args:
+                        response_tokens = _estimate_string_tokens(delta_tool_call.json_args)
+                        self._usage += usage.Usage(response_tokens=response_tokens, total_tokens=response_tokens)
+                    maybe_event = self._parts_manager.handle_tool_call_delta(
+                        vendor_part_id=dtc_index,
+                        tool_name=delta_tool_call.name,
+                        args=delta_tool_call.json_args,
+                        tool_call_id=None,
+                    )
+                    if maybe_event is not None:
+                        yield maybe_event
     def timestamp(self) -> datetime:
         return self._timestamp
-def _estimate_usage(messages: Iterable[ModelMessage]) -> result.Usage:
+def _estimate_usage(messages: Iterable[ModelMessage]) -> usage.Usage:
     """Very rough guesstimate of the token usage associated with a series of messages.
     This is designed to be used solely to give plausible numbers for testing!
@@ -253,28 +219,30 @@ def _estimate_usage(messages: Iterable[ModelMessage]) -> result.Usage:
         if isinstance(message, ModelRequest):
             for part in message.parts:
                 if isinstance(part, (SystemPromptPart, UserPromptPart)):
-                    request_tokens += _estimate_string_usage(part.content)
+                    request_tokens += _estimate_string_tokens(part.content)
                 elif isinstance(part, ToolReturnPart):
-                    request_tokens += _estimate_string_usage(part.model_response_str())
+                    request_tokens += _estimate_string_tokens(part.model_response_str())
                 elif isinstance(part, RetryPromptPart):
-                    request_tokens += _estimate_string_usage(part.model_response())
+                    request_tokens += _estimate_string_tokens(part.model_response())
                 else:
                     assert_never(part)
         elif isinstance(message, ModelResponse):
             for part in message.parts:
                 if isinstance(part, TextPart):
-                    response_tokens += _estimate_string_usage(part.content)
+                    response_tokens += _estimate_string_tokens(part.content)
                 elif isinstance(part, ToolCallPart):
                     call = part
-                    response_tokens += 1 + _estimate_string_usage(call.args_as_json_str())
+                    response_tokens += 1 + _estimate_string_tokens(call.args_as_json_str())
                 else:
                     assert_never(part)
         else:
             assert_never(message)
-    return result.Usage(
+    return usage.Usage(
         request_tokens=request_tokens, response_tokens=response_tokens, total_tokens=request_tokens + response_tokens
     )
-def _estimate_string_usage(content: str) -> int:
-    return len(re.split(r'[\s",.:]+', content))
+def _estimate_string_tokens(content: str) -> int:
+    if not content:
+        return 0
+    return len(re.split(r'[\s",.:]+', content.strip()))

pydantic-ai-slim 0.0.18__py3-none-any.whl → 0.0.19__py3-none-any.whl

Potentially problematic release.

pydantic-ai-slim 0.0.18py3-none-any.whl → 0.0.19py3-none-any.whl