PyPI - pydantic-ai-slim - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

pydantic-ai-slim 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (24) hide show

pydantic_ai/_griffe.py +23 -4
pydantic_ai/_parts_manager.py +239 -0
pydantic_ai/_pydantic.py +16 -3
pydantic_ai/_system_prompt.py +1 -0
pydantic_ai/_utils.py +80 -17
pydantic_ai/agent.py +332 -124
pydantic_ai/format_as_xml.py +2 -1
pydantic_ai/messages.py +224 -9
pydantic_ai/models/__init__.py +59 -82
pydantic_ai/models/anthropic.py +22 -22
pydantic_ai/models/function.py +47 -79
pydantic_ai/models/gemini.py +86 -125
pydantic_ai/models/groq.py +53 -125
pydantic_ai/models/mistral.py +75 -137
pydantic_ai/models/ollama.py +1 -0
pydantic_ai/models/openai.py +50 -125
pydantic_ai/models/test.py +40 -73
pydantic_ai/models/vertexai.py +1 -1
pydantic_ai/result.py +91 -92
pydantic_ai/tools.py +24 -5
{pydantic_ai_slim-0.0.17.dist-info → pydantic_ai_slim-0.0.19.dist-info}/METADATA +3 -1
pydantic_ai_slim-0.0.19.dist-info/RECORD +29 -0
pydantic_ai_slim-0.0.17.dist-info/RECORD +0 -28
{pydantic_ai_slim-0.0.17.dist-info → pydantic_ai_slim-0.0.19.dist-info}/WHEEL +0 -0

pydantic_ai/models/function.py CHANGED Viewed

@@ -7,16 +7,17 @@ from contextlib import asynccontextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime
 from itertools import chain
-from typing import Callable, Union, cast
+from typing import Callable, Union
 from typing_extensions import TypeAlias, assert_never, overload
-from .. import _utils, result
+from .. import _utils, usage
+from .._utils import PeekableAsyncStream
 from ..messages import (
     ModelMessage,
     ModelRequest,
     ModelResponse,
-    ModelResponsePart,
+    ModelResponseStreamEvent,
     RetryPromptPart,
     SystemPromptPart,
     TextPart,
@@ -26,7 +27,7 @@ from ..messages import (
 )
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
-from . import AgentModel, EitherStreamedResponse, Model, StreamStructuredResponse, StreamTextResponse
+from . import AgentModel, Model, StreamedResponse
 @dataclass(init=False)
@@ -142,7 +143,7 @@ class FunctionAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, result.Usage]:
+    ) -> tuple[ModelResponse, usage.Usage]:
         agent_info = replace(self.agent_info, model_settings=model_settings)
         assert self.function is not None, 'FunctionModel must receive a `function` to support non-streamed requests'
@@ -158,90 +159,55 @@ class FunctionAgentModel(AgentModel):
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> AsyncIterator[EitherStreamedResponse]:
+    ) -> AsyncIterator[StreamedResponse]:
         assert (
             self.stream_function is not None
         ), 'FunctionModel must receive a `stream_function` to support streamed requests'
-        response_stream = self.stream_function(messages, self.agent_info)
-        try:
-            first = await response_stream.__anext__()
-        except StopAsyncIteration as e:
-            raise ValueError('Stream function must return at least one item') from e
-        if isinstance(first, str):
-            text_stream = cast(AsyncIterator[str], response_stream)
-            yield FunctionStreamTextResponse(first, text_stream)
-        else:
-            structured_stream = cast(AsyncIterator[DeltaToolCalls], response_stream)
-            yield FunctionStreamStructuredResponse(first, structured_stream)
-@dataclass
-class FunctionStreamTextResponse(StreamTextResponse):
-    """Implementation of `StreamTextResponse` for [FunctionModel][pydantic_ai.models.function.FunctionModel]."""
-    _next: str | None
-    _iter: AsyncIterator[str]
-    _timestamp: datetime = field(default_factory=_utils.now_utc, init=False)
-    _buffer: list[str] = field(default_factory=list, init=False)
-    async def __anext__(self) -> None:
-        if self._next is not None:
-            self._buffer.append(self._next)
-            self._next = None
-        else:
-            self._buffer.append(await self._iter.__anext__())
-    def get(self, *, final: bool = False) -> Iterable[str]:
-        yield from self._buffer
-        self._buffer.clear()
+        response_stream = PeekableAsyncStream(self.stream_function(messages, self.agent_info))
-    def usage(self) -> result.Usage:
-        return result.Usage()
+        first = await response_stream.peek()
+        if isinstance(first, _utils.Unset):
+            raise ValueError('Stream function must return at least one item')
-    def timestamp(self) -> datetime:
-        return self._timestamp
+        yield FunctionStreamedResponse(response_stream)
 @dataclass
-class FunctionStreamStructuredResponse(StreamStructuredResponse):
-    """Implementation of `StreamStructuredResponse` for [FunctionModel][pydantic_ai.models.function.FunctionModel]."""
+class FunctionStreamedResponse(StreamedResponse):
+    """Implementation of `StreamedResponse` for [FunctionModel][pydantic_ai.models.function.FunctionModel]."""
-    _next: DeltaToolCalls | None
-    _iter: AsyncIterator[DeltaToolCalls]
-    _delta_tool_calls: dict[int, DeltaToolCall] = field(default_factory=dict)
+    _iter: AsyncIterator[str | DeltaToolCalls]
     _timestamp: datetime = field(default_factory=_utils.now_utc)
-    async def __anext__(self) -> None:
-        if self._next is not None:
-            tool_call = self._next
-            self._next = None
-        else:
-            tool_call = await self._iter.__anext__()
+    def __post_init__(self):
+        self._usage += _estimate_usage([])
-        for key, new in tool_call.items():
-            if current := self._delta_tool_calls.get(key):
-                current.name = _utils.add_optional(current.name, new.name)
-                current.json_args = _utils.add_optional(current.json_args, new.json_args)
+    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        async for item in self._iter:
+            if isinstance(item, str):
+                response_tokens = _estimate_string_tokens(item)
+                self._usage += usage.Usage(response_tokens=response_tokens, total_tokens=response_tokens)
+                yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=item)
             else:
-                self._delta_tool_calls[key] = new
-    def get(self, *, final: bool = False) -> ModelResponse:
-        calls: list[ModelResponsePart] = []
-        for c in self._delta_tool_calls.values():
-            if c.name is not None and c.json_args is not None:
-                calls.append(ToolCallPart.from_raw_args(c.name, c.json_args))
-        return ModelResponse(calls, timestamp=self._timestamp)
-    def usage(self) -> result.Usage:
-        return _estimate_usage([self.get()])
+                delta_tool_calls = item
+                for dtc_index, delta_tool_call in delta_tool_calls.items():
+                    if delta_tool_call.json_args:
+                        response_tokens = _estimate_string_tokens(delta_tool_call.json_args)
+                        self._usage += usage.Usage(response_tokens=response_tokens, total_tokens=response_tokens)
+                    maybe_event = self._parts_manager.handle_tool_call_delta(
+                        vendor_part_id=dtc_index,
+                        tool_name=delta_tool_call.name,
+                        args=delta_tool_call.json_args,
+                        tool_call_id=None,
+                    )
+                    if maybe_event is not None:
+                        yield maybe_event
     def timestamp(self) -> datetime:
         return self._timestamp
-def _estimate_usage(messages: Iterable[ModelMessage]) -> result.Usage:
+def _estimate_usage(messages: Iterable[ModelMessage]) -> usage.Usage:
     """Very rough guesstimate of the token usage associated with a series of messages.
     This is designed to be used solely to give plausible numbers for testing!
@@ -253,28 +219,30 @@ def _estimate_usage(messages: Iterable[ModelMessage]) -> result.Usage:
         if isinstance(message, ModelRequest):
             for part in message.parts:
                 if isinstance(part, (SystemPromptPart, UserPromptPart)):
-                    request_tokens += _estimate_string_usage(part.content)
+                    request_tokens += _estimate_string_tokens(part.content)
                 elif isinstance(part, ToolReturnPart):
-                    request_tokens += _estimate_string_usage(part.model_response_str())
+                    request_tokens += _estimate_string_tokens(part.model_response_str())
                 elif isinstance(part, RetryPromptPart):
-                    request_tokens += _estimate_string_usage(part.model_response())
+                    request_tokens += _estimate_string_tokens(part.model_response())
                 else:
                     assert_never(part)
         elif isinstance(message, ModelResponse):
             for part in message.parts:
                 if isinstance(part, TextPart):
-                    response_tokens += _estimate_string_usage(part.content)
+                    response_tokens += _estimate_string_tokens(part.content)
                 elif isinstance(part, ToolCallPart):
                     call = part
-                    response_tokens += 1 + _estimate_string_usage(call.args_as_json_str())
+                    response_tokens += 1 + _estimate_string_tokens(call.args_as_json_str())
                 else:
                     assert_never(part)
         else:
             assert_never(message)
-    return result.Usage(
+    return usage.Usage(
         request_tokens=request_tokens, response_tokens=response_tokens, total_tokens=request_tokens + response_tokens
     )
-def _estimate_string_usage(content: str) -> int:
-    return len(re.split(r'[\s",.:]+', content))
+def _estimate_string_tokens(content: str) -> int:
+    if not content:
+        return 0
+    return len(re.split(r'[\s",.:]+', content.strip()))

pydantic_ai/models/gemini.py CHANGED Viewed

@@ -2,24 +2,25 @@ from __future__ import annotations as _annotations
 import os
 import re
-from collections.abc import AsyncIterator, Iterable, Sequence
+from collections.abc import AsyncIterator, Sequence
 from contextlib import asynccontextmanager
 from copy import deepcopy
 from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Annotated, Any, Literal, Protocol, Union
+from uuid import uuid4
 import pydantic
-import pydantic_core
 from httpx import USE_CLIENT_DEFAULT, AsyncClient as AsyncHTTPClient, Response as HTTPResponse
-from typing_extensions import NotRequired, TypedDict, TypeGuard, assert_never
+from typing_extensions import NotRequired, TypedDict, assert_never
-from .. import UnexpectedModelBehavior, _utils, exceptions, result
+from .. import UnexpectedModelBehavior, _utils, exceptions, usage
 from ..messages import (
     ModelMessage,
     ModelRequest,
     ModelResponse,
     ModelResponsePart,
+    ModelResponseStreamEvent,
     RetryPromptPart,
     SystemPromptPart,
     TextPart,
@@ -31,10 +32,8 @@ from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import (
     AgentModel,
-    EitherStreamedResponse,
     Model,
-    StreamStructuredResponse,
-    StreamTextResponse,
+    StreamedResponse,
     cached_async_http_client,
     check_allow_model_requests,
     get_user_agent,
@@ -111,7 +110,7 @@ class GeminiModel(Model):
         )
     def name(self) -> str:
-        return self.model_name
+        return f'google-gla:{self.model_name}'
 class AuthProtocol(Protocol):
@@ -171,7 +170,7 @@ class GeminiAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, result.Usage]:
+    ) -> tuple[ModelResponse, usage.Usage]:
         async with self._make_request(messages, False, model_settings) as http_response:
             response = _gemini_response_ta.validate_json(await http_response.aread())
         return self._process_response(response), _metadata_as_usage(response)
@@ -179,7 +178,7 @@ class GeminiAgentModel(AgentModel):
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> AsyncIterator[EitherStreamedResponse]:
+    ) -> AsyncIterator[StreamedResponse]:
         async with self._make_request(messages, True, model_settings) as http_response:
             yield await self._process_streamed_response(http_response)
@@ -238,7 +237,7 @@ class GeminiAgentModel(AgentModel):
         return _process_response_from_parts(parts)
     @staticmethod
-    async def _process_streamed_response(http_response: HTTPResponse) -> EitherStreamedResponse:
+    async def _process_streamed_response(http_response: HTTPResponse) -> StreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""
         aiter_bytes = http_response.aiter_bytes()
         start_response: _GeminiResponse | None = None
@@ -259,11 +258,7 @@ class GeminiAgentModel(AgentModel):
         if start_response is None:
             raise UnexpectedModelBehavior('Streamed response ended without content or tool calls')
-        # TODO: Update this once we rework stream responses to be more flexible
-        if _extract_response_parts(start_response).is_left():
-            return GeminiStreamStructuredResponse(_content=content, _stream=aiter_bytes)
-        else:
-            return GeminiStreamTextResponse(_json_content=content, _stream=aiter_bytes)
+        return GeminiStreamedResponse(_content=content, _stream=aiter_bytes)
     @classmethod
     def _message_to_gemini_content(
@@ -302,86 +297,69 @@ class GeminiAgentModel(AgentModel):
 @dataclass
-class GeminiStreamTextResponse(StreamTextResponse):
-    """Implementation of `StreamTextResponse` for the Gemini model."""
-    _json_content: bytearray
-    _stream: AsyncIterator[bytes]
-    _position: int = 0
-    _timestamp: datetime = field(default_factory=_utils.now_utc, init=False)
-    _usage: result.Usage = field(default_factory=result.Usage, init=False)
-    async def __anext__(self) -> None:
-        chunk = await self._stream.__anext__()
-        self._json_content.extend(chunk)
-    def get(self, *, final: bool = False) -> Iterable[str]:
-        if final:
-            all_items = pydantic_core.from_json(self._json_content)
-            new_items = all_items[self._position :]
-            self._position = len(all_items)
-            new_responses = _gemini_streamed_response_ta.validate_python(new_items)
-        else:
-            all_items = pydantic_core.from_json(self._json_content, allow_partial=True)
-            new_items = all_items[self._position : -1]
-            self._position = len(all_items) - 1
-            new_responses = _gemini_streamed_response_ta.validate_python(
-                new_items, experimental_allow_partial='trailing-strings'
-            )
-        for r in new_responses:
-            self._usage += _metadata_as_usage(r)
-            parts = r['candidates'][0]['content']['parts']
-            if _all_text_parts(parts):
-                for part in parts:
-                    yield part['text']
-            else:
-                raise UnexpectedModelBehavior(
-                    'Streamed response with unexpected content, expected all parts to be text'
-                )
-    def usage(self) -> result.Usage:
-        return self._usage
-    def timestamp(self) -> datetime:
-        return self._timestamp
-@dataclass
-class GeminiStreamStructuredResponse(StreamStructuredResponse):
-    """Implementation of `StreamStructuredResponse` for the Gemini model."""
+class GeminiStreamedResponse(StreamedResponse):
+    """Implementation of `StreamedResponse` for the Gemini model."""
     _content: bytearray
     _stream: AsyncIterator[bytes]
     _timestamp: datetime = field(default_factory=_utils.now_utc, init=False)
-    _usage: result.Usage = field(default_factory=result.Usage, init=False)
-    async def __anext__(self) -> None:
-        chunk = await self._stream.__anext__()
-        self._content.extend(chunk)
-    def get(self, *, final: bool = False) -> ModelResponse:
-        """Get the `ModelResponse` at this point.
-        NOTE: It's not clear how the stream of responses should be combined because Gemini seems to always
-        reply with a single response, when returning a structured data.
+    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        async for gemini_response in self._get_gemini_responses():
+            candidate = gemini_response['candidates'][0]
+            gemini_part: _GeminiPartUnion
+            for gemini_part in candidate['content']['parts']:
+                if 'text' in gemini_part:
+                    # Using vendor_part_id=None means we can produce multiple text parts if their deltas are sprinkled
+                    # amongst the tool call deltas
+                    yield self._parts_manager.handle_text_delta(vendor_part_id=None, content=gemini_part['text'])
+                elif 'function_call' in gemini_part:
+                    # Here, we assume all function_call parts are complete and don't have deltas.
+                    # We do this by assigning a unique randomly generated "vendor_part_id".
+                    # We need to confirm whether this is actually true, but if it isn't, we can still handle it properly
+                    # it would just be a bit more complicated. And we'd need to confirm the intended semantics.
+                    maybe_event = self._parts_manager.handle_tool_call_delta(
+                        vendor_part_id=uuid4(),
+                        tool_name=gemini_part['function_call']['name'],
+                        args=gemini_part['function_call']['args'],
+                        tool_call_id=None,
+                    )
+                    if maybe_event is not None:
+                        yield maybe_event
+                else:
+                    assert 'function_response' in gemini_part, f'Unexpected part: {gemini_part}'
+    async def _get_gemini_responses(self) -> AsyncIterator[_GeminiResponse]:
+        # This method exists to ensure we only yield completed items, so we don't need to worry about
+        # partial gemini responses, which would make everything more complicated
+        gemini_responses: list[_GeminiResponse] = []
+        current_gemini_response_index = 0
+        # Right now, there are some circumstances where we will have information that could be yielded sooner than it is
+        # But changing that would make things a lot more complicated.
+        async for chunk in self._stream:
+            self._content.extend(chunk)
+            gemini_responses = _gemini_streamed_response_ta.validate_json(
+                self._content,
+                experimental_allow_partial='trailing-strings',
+            )
-        I'm therefore assuming that each part contains a complete tool call, and not trying to combine data from
-        separate parts.
-        """
-        responses = _gemini_streamed_response_ta.validate_json(
-            self._content,
-            experimental_allow_partial='off' if final else 'trailing-strings',
-        )
-        combined_parts: list[_GeminiPartUnion] = []
-        self._usage = result.Usage()
-        for r in responses:
+            # The idea: yield only up to the latest response, which might still be partial.
+            # Note that if the latest response is complete, we could yield it immediately, but there's not a good
+            # allow_partial API to determine if the last item in the list is complete.
+            responses_to_yield = gemini_responses[:-1]
+            for r in responses_to_yield[current_gemini_response_index:]:
+                current_gemini_response_index += 1
+                self._usage += _metadata_as_usage(r)
+                yield r
+        # Now yield the final response, which should be complete
+        if gemini_responses:
+            r = gemini_responses[-1]
             self._usage += _metadata_as_usage(r)
-            candidate = r['candidates'][0]
-            combined_parts.extend(candidate['content']['parts'])
-        return _process_response_from_parts(combined_parts, timestamp=self._timestamp)
-    def usage(self) -> result.Usage:
-        return self._usage
+            yield r
     def timestamp(self) -> datetime:
         return self._timestamp
@@ -458,9 +436,14 @@ def _process_response_from_parts(parts: Sequence[_GeminiPartUnion], timestamp: d
     items: list[ModelResponsePart] = []
     for part in parts:
         if 'text' in part:
-            items.append(TextPart(part['text']))
+            items.append(TextPart(content=part['text']))
         elif 'function_call' in part:
-            items.append(ToolCallPart.from_raw_args(part['function_call']['name'], part['function_call']['args']))
+            items.append(
+                ToolCallPart.from_raw_args(
+                    tool_name=part['function_call']['name'],
+                    args=part['function_call']['args'],
+                )
+            )
         elif 'function_response' in part:
             raise exceptions.UnexpectedModelBehavior(
                 f'Unsupported response from Gemini, expected all parts to be function calls or text, got: {part!r}'
@@ -575,35 +558,6 @@ class _GeminiResponse(TypedDict):
     prompt_feedback: NotRequired[Annotated[_GeminiPromptFeedback, pydantic.Field(alias='promptFeedback')]]
-# TODO: Delete the next three functions once we've reworked streams to be more flexible
-def _extract_response_parts(
-    response: _GeminiResponse,
-) -> _utils.Either[list[_GeminiFunctionCallPart], list[_GeminiTextPart]]:
-    """Extract the parts of the response from the Gemini API.
-    Returns Either a list of function calls (Either.left) or a list of text parts (Either.right).
-    """
-    if len(response['candidates']) != 1:
-        raise UnexpectedModelBehavior('Expected exactly one candidate in Gemini response')
-    parts = response['candidates'][0]['content']['parts']
-    if _all_function_call_parts(parts):
-        return _utils.Either(left=parts)
-    elif _all_text_parts(parts):
-        return _utils.Either(right=parts)
-    else:
-        raise exceptions.UnexpectedModelBehavior(
-            f'Unsupported response from Gemini, expected all parts to be function calls or text, got: {parts!r}'
-        )
-def _all_function_call_parts(parts: list[_GeminiPartUnion]) -> TypeGuard[list[_GeminiFunctionCallPart]]:
-    return all('function_call' in part for part in parts)
-def _all_text_parts(parts: list[_GeminiPartUnion]) -> TypeGuard[list[_GeminiTextPart]]:
-    return all('text' in part for part in parts)
 class _GeminiCandidates(TypedDict):
     """See <https://ai.google.dev/api/generate-content#v1beta.Candidate>."""
@@ -630,14 +584,14 @@ class _GeminiUsageMetaData(TypedDict, total=False):
     cached_content_token_count: NotRequired[Annotated[int, pydantic.Field(alias='cachedContentTokenCount')]]
-def _metadata_as_usage(response: _GeminiResponse) -> result.Usage:
+def _metadata_as_usage(response: _GeminiResponse) -> usage.Usage:
     metadata = response.get('usage_metadata')
     if metadata is None:
-        return result.Usage()
+        return usage.Usage()
     details: dict[str, int] = {}
     if cached_content_token_count := metadata.get('cached_content_token_count'):
         details['cached_content_token_count'] = cached_content_token_count
-    return result.Usage(
+    return usage.Usage(
         request_tokens=metadata.get('prompt_token_count', 0),
         response_tokens=metadata.get('candidates_token_count', 0),
         total_tokens=metadata.get('total_token_count', 0),
@@ -693,7 +647,7 @@ class _GeminiJsonSchema:
     def _simplify(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
         schema.pop('title', None)
-        default = schema.pop('default', _utils.UNSET)
+        schema.pop('default', None)
         if ref := schema.pop('$ref', None):
             # noinspection PyTypeChecker
             key = re.sub(r'^#/\$defs/', '', ref)
@@ -708,11 +662,12 @@ class _GeminiJsonSchema:
         if any_of := schema.get('anyOf'):
             for item_schema in any_of:
                 self._simplify(item_schema, refs_stack)
-            if len(any_of) == 2 and {'type': 'null'} in any_of and default is None:
+            if len(any_of) == 2 and {'type': 'null'} in any_of:
                 for item_schema in any_of:
                     if item_schema != {'type': 'null'}:
                         schema.clear()
                         schema.update(item_schema)
+                        schema['nullable'] = True
                         return
         type_ = schema.get('type')
@@ -721,6 +676,12 @@ class _GeminiJsonSchema:
             self._object(schema, refs_stack)
         elif type_ == 'array':
             return self._array(schema, refs_stack)
+        elif type_ == 'string' and (fmt := schema.pop('format', None)):
+            description = schema.get('description')
+            if description:
+                schema['description'] = f'{description} (format: {fmt})'
+            else:
+                schema['description'] = f'Format: {fmt}'
     def _object(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
         ad_props = schema.pop('additionalProperties', None)

pydantic-ai-slim 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

Potentially problematic release.

pydantic-ai-slim 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl