PyPI - pydantic-ai-slim - Versions diffs - 0.1.11__tar.gz → 0.2.0__tar.gz - Mend

pydantic-ai-slim 0.1.11tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (53) hide show

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-ai-slim
-Version: 0.1.11
+Version: 0.2.0
 Summary: Agent Framework / shim to use Pydantic with LLMs, slim package
 Author-email: Samuel Colvin <samuel@pydantic.dev>, Marcelo Trylesinski <marcelotryle@gmail.com>, David Montague <david@pydantic.dev>, Alex Hall <alex@pydantic.dev>
 License-Expression: MIT
@@ -29,7 +29,7 @@ Requires-Dist: exceptiongroup; python_version < '3.11'
 Requires-Dist: griffe>=1.3.2
 Requires-Dist: httpx>=0.27
 Requires-Dist: opentelemetry-api>=1.28.0
-Requires-Dist: pydantic-graph==0.1.11
+Requires-Dist: pydantic-graph==0.2.0
 Requires-Dist: pydantic>=2.10
 Requires-Dist: typing-inspection>=0.4.0
 Provides-Extra: anthropic
@@ -45,7 +45,7 @@ Requires-Dist: cohere>=5.13.11; (platform_system != 'Emscripten') and extra == '
 Provides-Extra: duckduckgo
 Requires-Dist: duckduckgo-search>=7.0.0; extra == 'duckduckgo'
 Provides-Extra: evals
-Requires-Dist: pydantic-evals==0.1.11; extra == 'evals'
+Requires-Dist: pydantic-evals==0.2.0; extra == 'evals'
 Provides-Extra: groq
 Requires-Dist: groq>=0.15.0; extra == 'groq'
 Provides-Extra: logfire

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/_agent_graph.py RENAMED Viewed

@@ -301,16 +301,15 @@ class ModelRequestNode(AgentNode[DepsT, NodeRunEndT]):
             ctx.state.message_history, model_settings, model_request_parameters
         ) as streamed_response:
             self._did_stream = True
-            ctx.state.usage.incr(_usage.Usage(), requests=1)
+            ctx.state.usage.requests += 1
             yield streamed_response
             # In case the user didn't manually consume the full stream, ensure it is fully consumed here,
             # otherwise usage won't be properly counted:
             async for _ in streamed_response:
                 pass
         model_response = streamed_response.get()
-        request_usage = streamed_response.usage()
-        self._finish_handling(ctx, model_response, request_usage)
+        self._finish_handling(ctx, model_response)
         assert self._result is not None  # this should be set by the previous line
     async def _make_request(
@@ -321,12 +320,12 @@ class ModelRequestNode(AgentNode[DepsT, NodeRunEndT]):
         model_settings, model_request_parameters = await self._prepare_request(ctx)
         model_request_parameters = ctx.deps.model.customize_request_parameters(model_request_parameters)
-        model_response, request_usage = await ctx.deps.model.request(
+        model_response = await ctx.deps.model.request(
             ctx.state.message_history, model_settings, model_request_parameters
         )
-        ctx.state.usage.incr(_usage.Usage(), requests=1)
+        ctx.state.usage.incr(_usage.Usage())
-        return self._finish_handling(ctx, model_response, request_usage)
+        return self._finish_handling(ctx, model_response)
     async def _prepare_request(
         self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]]
@@ -348,10 +347,9 @@ class ModelRequestNode(AgentNode[DepsT, NodeRunEndT]):
         self,
         ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
         response: _messages.ModelResponse,
-        usage: _usage.Usage,
     ) -> CallToolsNode[DepsT, NodeRunEndT]:
         # Update usage
-        ctx.state.usage.incr(usage, requests=0)
+        ctx.state.usage.incr(response.usage)
         if ctx.deps.usage_limits:
             ctx.deps.usage_limits.check_tokens(ctx.state.usage)

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/_parts_manager.py RENAMED Viewed

@@ -14,7 +14,7 @@ event-emitting logic.
 from __future__ import annotations as _annotations
 from collections.abc import Hashable
-from dataclasses import dataclass, field
+from dataclasses import dataclass, field, replace
 from typing import Any, Union
 from pydantic_ai.exceptions import UnexpectedModelBehavior
@@ -198,6 +198,8 @@ class ModelResponsePartsManager:
                     return PartStartEvent(index=part_index, part=updated_part)
                 else:
                     # We updated an existing part, so emit a PartDeltaEvent
+                    if updated_part.tool_call_id and not delta.tool_call_id:
+                        delta = replace(delta, tool_call_id=updated_part.tool_call_id)
                     return PartDeltaEvent(index=part_index, delta=delta)
     def handle_tool_call_part(

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/agent.py RENAMED Viewed

@@ -551,6 +551,13 @@ class Agent(Generic[AgentDepsT, OutputDataT]):
                 CallToolsNode(
                     model_response=ModelResponse(
                         parts=[TextPart(content='Paris', part_kind='text')],
+                        usage=Usage(
+                            requests=1,
+                            request_tokens=56,
+                            response_tokens=1,
+                            total_tokens=57,
+                            details=None,
+                        ),
                         model_name='gpt-4o',
                         timestamp=datetime.datetime(...),
                         kind='response',
@@ -1715,6 +1722,13 @@ class AgentRun(Generic[AgentDepsT, OutputDataT]):
             CallToolsNode(
                 model_response=ModelResponse(
                     parts=[TextPart(content='Paris', part_kind='text')],
+                    usage=Usage(
+                        requests=1,
+                        request_tokens=56,
+                        response_tokens=1,
+                        total_tokens=57,
+                        details=None,
+                    ),
                     model_name='gpt-4o',
                     timestamp=datetime.datetime(...),
                     kind='response',
@@ -1853,6 +1867,13 @@ class AgentRun(Generic[AgentDepsT, OutputDataT]):
                     CallToolsNode(
                         model_response=ModelResponse(
                             parts=[TextPart(content='Paris', part_kind='text')],
+                            usage=Usage(
+                                requests=1,
+                                request_tokens=56,
+                                response_tokens=1,
+                                total_tokens=57,
+                                details=None,
+                            ),
                             model_name='gpt-4o',
                             timestamp=datetime.datetime(...),
                             kind='response',

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/messages.py RENAMED Viewed

@@ -14,6 +14,7 @@ from typing_extensions import TypeAlias
 from ._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc
 from .exceptions import UnexpectedModelBehavior
+from .usage import Usage
 AudioMediaType: TypeAlias = Literal['audio/wav', 'audio/mpeg']
 ImageMediaType: TypeAlias = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
@@ -554,6 +555,12 @@ class ModelResponse:
     parts: list[ModelResponsePart]
     """The parts of the model message."""
+    usage: Usage = field(default_factory=Usage)
+    """Usage information for the request.
+    This has a default to make tests easier, and to support loading old messages where usage will be missing.
+    """
     model_name: str | None = None
     """The name of the model that generated the response."""

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/__init__.py RENAMED Viewed

@@ -12,7 +12,6 @@ from contextlib import asynccontextmanager, contextmanager
 from dataclasses import dataclass, field
 from datetime import datetime
 from functools import cache
-from typing import TYPE_CHECKING
 import httpx
 from typing_extensions import Literal, TypeAliasType
@@ -21,12 +20,9 @@ from .._parts_manager import ModelResponsePartsManager
 from ..exceptions import UserError
 from ..messages import ModelMessage, ModelRequest, ModelResponse, ModelResponseStreamEvent
 from ..settings import ModelSettings
+from ..tools import ToolDefinition
 from ..usage import Usage
-if TYPE_CHECKING:
-    from ..tools import ToolDefinition
 KnownModelName = TypeAliasType(
     'KnownModelName',
     Literal[
@@ -278,7 +274,7 @@ class Model(ABC):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, Usage]:
+    ) -> ModelResponse:
         """Make a request to the model."""
         raise NotImplementedError()
@@ -365,7 +361,10 @@ class StreamedResponse(ABC):
     def get(self) -> ModelResponse:
         """Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far."""
         return ModelResponse(
-            parts=self._parts_manager.get_parts(), model_name=self.model_name, timestamp=self.timestamp
+            parts=self._parts_manager.get_parts(),
+            model_name=self.model_name,
+            timestamp=self.timestamp,
+            usage=self.usage(),
         )
     def usage(self) -> Usage:

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/_json_schema.py RENAMED Viewed

@@ -25,7 +25,7 @@ class WalkJsonSchema(ABC):
         self.simplify_nullable_unions = simplify_nullable_unions
         self.defs: dict[str, JsonSchema] = self.schema.get('$defs', {})
-        self.refs_stack = tuple[str, ...]()
+        self.refs_stack: list[str] = []
         self.recursive_refs = set[str]()
     @abstractmethod
@@ -62,13 +62,16 @@ class WalkJsonSchema(ABC):
         return handled
     def _handle(self, schema: JsonSchema) -> JsonSchema:
+        nested_refs = 0
         if self.prefer_inlined_defs:
             while ref := schema.get('$ref'):
                 key = re.sub(r'^#/\$defs/', '', ref)
                 if key in self.refs_stack:
                     self.recursive_refs.add(key)
                     break  # recursive ref can't be unpacked
-                self.refs_stack += (key,)
+                self.refs_stack.append(key)
+                nested_refs += 1
                 def_schema = self.defs.get(key)
                 if def_schema is None:  # pragma: no cover
                     raise UserError(f'Could not find $ref definition for {key}')
@@ -87,6 +90,9 @@ class WalkJsonSchema(ABC):
         # Apply the base transform
         schema = self.transform(schema)
+        if nested_refs > 0:
+            self.refs_stack = self.refs_stack[:-nested_refs]
         return schema
     def _handle_object(self, schema: JsonSchema) -> JsonSchema:

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/anthropic.py RENAMED Viewed

@@ -145,12 +145,14 @@ class AnthropicModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         check_allow_model_requests()
         response = await self._messages_create(
             messages, False, cast(AnthropicModelSettings, model_settings or {}), model_request_parameters
         )
-        return self._process_response(response), _map_usage(response)
+        model_response = self._process_response(response)
+        model_response.usage.requests = 1
+        return model_response
     @asynccontextmanager
     async def request_stream(
@@ -260,7 +262,7 @@ class AnthropicModel(Model):
                     )
                 )
-        return ModelResponse(items, model_name=response.model)
+        return ModelResponse(items, usage=_map_usage(response), model_name=response.model)
     async def _process_streamed_response(self, response: AsyncStream[RawMessageStreamEvent]) -> StreamedResponse:
         peekable_response = _utils.PeekableAsyncStream(response)
@@ -391,36 +393,31 @@ class AnthropicModel(Model):
 def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage:
     if isinstance(message, AnthropicMessage):
         response_usage = message.usage
+    elif isinstance(message, RawMessageStartEvent):
+        response_usage = message.message.usage
+    elif isinstance(message, RawMessageDeltaEvent):
+        response_usage = message.usage
     else:
-        if isinstance(message, RawMessageStartEvent):
-            response_usage = message.message.usage
-        elif isinstance(message, RawMessageDeltaEvent):
-            response_usage = message.usage
-        else:
-            # No usage information provided in:
-            # - RawMessageStopEvent
-            # - RawContentBlockStartEvent
-            # - RawContentBlockDeltaEvent
-            # - RawContentBlockStopEvent
-            response_usage = None
-    if response_usage is None:
+        # No usage information provided in:
+        # - RawMessageStopEvent
+        # - RawContentBlockStartEvent
+        # - RawContentBlockDeltaEvent
+        # - RawContentBlockStopEvent
         return usage.Usage()
-    # Store all integer-typed usage values in the details dict
-    response_usage_dict = response_usage.model_dump()
-    details: dict[str, int] = {}
-    for key, value in response_usage_dict.items():
-        if isinstance(value, int):
-            details[key] = value
+    # Store all integer-typed usage values in the details, except 'output_tokens' which is represented exactly by
+    # `response_tokens`
+    details: dict[str, int] = {
+        key: value for key, value in response_usage.model_dump().items() if isinstance(value, int)
+    }
-    # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence the getattr call
+    # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence using `get`
     # Tokens are only counted once between input_tokens, cache_creation_input_tokens, and cache_read_input_tokens
     # This approach maintains request_tokens as the count of all input tokens, with cached counts as details
     request_tokens = (
-        getattr(response_usage, 'input_tokens', 0)
-        + (getattr(response_usage, 'cache_creation_input_tokens', 0) or 0)  # These can be missing, None, or int
-        + (getattr(response_usage, 'cache_read_input_tokens', 0) or 0)
+        details.get('input_tokens', 0)
+        + details.get('cache_creation_input_tokens', 0)
+        + details.get('cache_read_input_tokens', 0)
     )
     return usage.Usage(

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/bedrock.py RENAMED Viewed

@@ -232,10 +232,12 @@ class BedrockConverseModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         settings = cast(BedrockModelSettings, model_settings or {})
         response = await self._messages_create(messages, False, settings, model_request_parameters)
-        return await self._process_response(response)
+        model_response = await self._process_response(response)
+        model_response.usage.requests = 1
+        return model_response
     @asynccontextmanager
     async def request_stream(
@@ -248,7 +250,7 @@ class BedrockConverseModel(Model):
         response = await self._messages_create(messages, True, settings, model_request_parameters)
         yield BedrockStreamedResponse(_model_name=self.model_name, _event_stream=response)
-    async def _process_response(self, response: ConverseResponseTypeDef) -> tuple[ModelResponse, usage.Usage]:
+    async def _process_response(self, response: ConverseResponseTypeDef) -> ModelResponse:
         items: list[ModelResponsePart] = []
         if message := response['output'].get('message'):
             for item in message['content']:
@@ -269,7 +271,7 @@ class BedrockConverseModel(Model):
             response_tokens=response['usage']['outputTokens'],
             total_tokens=response['usage']['totalTokens'],
         )
-        return ModelResponse(items, model_name=self.model_name), u
+        return ModelResponse(items, usage=u, model_name=self.model_name)
     @overload
     async def _messages_create(
@@ -367,13 +369,16 @@ class BedrockConverseModel(Model):
     async def _map_messages(
         self, messages: list[ModelMessage]
     ) -> tuple[list[SystemContentBlockTypeDef], list[MessageUnionTypeDef]]:
-        """Just maps a `pydantic_ai.Message` to the Bedrock `MessageUnionTypeDef`."""
+        """Maps a `pydantic_ai.Message` to the Bedrock `MessageUnionTypeDef`.
+        Groups consecutive ToolReturnPart objects into a single user message as required by Bedrock Claude/Nova models.
+        """
         system_prompt: list[SystemContentBlockTypeDef] = []
         bedrock_messages: list[MessageUnionTypeDef] = []
         document_count: Iterator[int] = count(1)
-        for m in messages:
-            if isinstance(m, ModelRequest):
-                for part in m.parts:
+        for message in messages:
+            if isinstance(message, ModelRequest):
+                for part in message.parts:
                     if isinstance(part, SystemPromptPart):
                         system_prompt.append({'text': part.content})
                     elif isinstance(part, UserPromptPart):
@@ -414,9 +419,9 @@ class BedrockConverseModel(Model):
                                     ],
                                 }
                             )
-            elif isinstance(m, ModelResponse):
+            elif isinstance(message, ModelResponse):
                 content: list[ContentBlockOutputTypeDef] = []
-                for item in m.parts:
+                for item in message.parts:
                     if isinstance(item, TextPart):
                         content.append({'text': item.content})
                     else:
@@ -424,12 +429,31 @@ class BedrockConverseModel(Model):
                         content.append(self._map_tool_call(item))
                 bedrock_messages.append({'role': 'assistant', 'content': content})
             else:
-                assert_never(m)
+                assert_never(message)
+        # Merge together sequential user messages.
+        processed_messages: list[MessageUnionTypeDef] = []
+        last_message: dict[str, Any] | None = None
+        for current_message in bedrock_messages:
+            if (
+                last_message is not None
+                and current_message['role'] == last_message['role']
+                and current_message['role'] == 'user'
+            ):
+                # Add the new user content onto the existing user message.
+                last_content = list(last_message['content'])
+                last_content.extend(current_message['content'])
+                last_message['content'] = last_content
+                continue
+            # Add the entire message to the list of messages.
+            processed_messages.append(current_message)
+            last_message = cast(dict[str, Any], current_message)
         if instructions := self._get_instructions(messages):
             system_prompt.insert(0, {'text': instructions})
-        return system_prompt, bedrock_messages
+        return system_prompt, processed_messages
     @staticmethod
     async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int]) -> list[MessageUnionTypeDef]:

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/cohere.py RENAMED Viewed

@@ -133,10 +133,12 @@ class CohereModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         check_allow_model_requests()
         response = await self._chat(messages, cast(CohereModelSettings, model_settings or {}), model_request_parameters)
-        return self._process_response(response), _map_usage(response)
+        model_response = self._process_response(response)
+        model_response.usage.requests = 1
+        return model_response
     @property
     def model_name(self) -> CohereModelName:
@@ -191,7 +193,7 @@ class CohereModel(Model):
                         tool_call_id=c.id or _generate_tool_call_id(),
                     )
                 )
-        return ModelResponse(parts=parts, model_name=self._model_name)
+        return ModelResponse(parts=parts, usage=_map_usage(response), model_name=self._model_name)
     def _map_messages(self, messages: list[ModelMessage]) -> list[ChatMessageV2]:
         """Just maps a `pydantic_ai.Message` to a `cohere.ChatMessageV2`."""

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/fallback.py RENAMED Viewed

@@ -15,7 +15,6 @@ from . import KnownModelName, Model, ModelRequestParameters, StreamedResponse, i
 if TYPE_CHECKING:
     from ..messages import ModelMessage, ModelResponse
     from ..settings import ModelSettings
-    from ..usage import Usage
 @dataclass(init=False)
@@ -55,7 +54,7 @@ class FallbackModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, Usage]:
+    ) -> ModelResponse:
         """Try each model in sequence until one succeeds.
         In case of failure, raise a FallbackExceptionGroup with all exceptions.
@@ -65,7 +64,7 @@ class FallbackModel(Model):
         for model in self.models:
             customized_model_request_parameters = model.customize_request_parameters(model_request_parameters)
             try:
-                response, usage = await model.request(messages, model_settings, customized_model_request_parameters)
+                response = await model.request(messages, model_settings, customized_model_request_parameters)
             except Exception as exc:
                 if self._fallback_on(exc):
                     exceptions.append(exc)
@@ -73,7 +72,7 @@ class FallbackModel(Model):
                 raise exc
             self._set_span_attributes(model)
-            return response, usage
+            return response
         raise FallbackExceptionGroup('All models from FallbackModel failed', exceptions)

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/function.py RENAMED Viewed

@@ -88,7 +88,7 @@ class FunctionModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         agent_info = AgentInfo(
             model_request_parameters.function_tools,
             model_request_parameters.allow_text_output,
@@ -105,8 +105,11 @@ class FunctionModel(Model):
             assert isinstance(response_, ModelResponse), response_
             response = response_
         response.model_name = self._model_name
-        # TODO is `messages` right here? Should it just be new messages?
-        return response, _estimate_usage(chain(messages, [response]))
+        # Add usage data if not already present
+        if not response.usage.has_values():
+            response.usage = _estimate_usage(chain(messages, [response]))
+            response.usage.requests = 1
+        return response
     @asynccontextmanager
     async def request_stream(
@@ -273,7 +276,9 @@ def _estimate_usage(messages: Iterable[ModelMessage]) -> usage.Usage:
         else:
             assert_never(message)
     return usage.Usage(
-        request_tokens=request_tokens, response_tokens=response_tokens, total_tokens=request_tokens + response_tokens
+        request_tokens=request_tokens,
+        response_tokens=response_tokens,
+        total_tokens=request_tokens + response_tokens,
     )

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/gemini.py RENAMED Viewed

@@ -145,14 +145,14 @@ class GeminiModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         check_allow_model_requests()
         async with self._make_request(
             messages, False, cast(GeminiModelSettings, model_settings or {}), model_request_parameters
         ) as http_response:
             data = await http_response.aread()
             response = _gemini_response_ta.validate_json(data)
-        return self._process_response(response), _metadata_as_usage(response)
+        return self._process_response(response)
     @asynccontextmanager
     async def request_stream(
@@ -269,7 +269,9 @@ class GeminiModel(Model):
             else:
                 raise UnexpectedModelBehavior('Content field missing from Gemini response', str(response))
         parts = response['candidates'][0]['content']['parts']
-        return _process_response_from_parts(parts, model_name=response.get('model_version', self._model_name))
+        usage = _metadata_as_usage(response)
+        usage.requests = 1
+        return _process_response_from_parts(parts, response.get('model_version', self._model_name), usage)
     async def _process_streamed_response(self, http_response: HTTPResponse) -> StreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""
@@ -591,7 +593,7 @@ def _function_call_part_from_call(tool: ToolCallPart) -> _GeminiFunctionCallPart
 def _process_response_from_parts(
-    parts: Sequence[_GeminiPartUnion], model_name: GeminiModelName, timestamp: datetime | None = None
+    parts: Sequence[_GeminiPartUnion], model_name: GeminiModelName, usage: usage.Usage
 ) -> ModelResponse:
     items: list[ModelResponsePart] = []
     for part in parts:
@@ -603,7 +605,7 @@ def _process_response_from_parts(
             raise UnexpectedModelBehavior(
                 f'Unsupported response from Gemini, expected all parts to be function calls or text, got: {part!r}'
             )
-    return ModelResponse(parts=items, model_name=model_name, timestamp=timestamp or _utils.now_utc())
+    return ModelResponse(parts=items, usage=usage, model_name=model_name)
 class _GeminiFunctionCall(TypedDict):
@@ -831,6 +833,12 @@ class _GeminiJsonSchema(WalkJsonSchema):
         schema.pop('exclusiveMaximum', None)
         schema.pop('exclusiveMinimum', None)
+        # Gemini only supports string enums, so we need to convert any enum values to strings.
+        # Pydantic will take care of transforming the transformed string values to the correct type.
+        if enum := schema.get('enum'):
+            schema['type'] = 'string'
+            schema['enum'] = [str(val) for val in enum]
         type_ = schema.get('type')
         if 'oneOf' in schema and 'type' not in schema:  # pragma: no cover
             # This gets hit when we have a discriminated union

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/groq.py RENAMED Viewed

@@ -130,12 +130,14 @@ class GroqModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         check_allow_model_requests()
         response = await self._completions_create(
             messages, False, cast(GroqModelSettings, model_settings or {}), model_request_parameters
         )
-        return self._process_response(response), _map_usage(response)
+        model_response = self._process_response(response)
+        model_response.usage.requests = 1
+        return model_response
     @asynccontextmanager
     async def request_stream(
@@ -237,7 +239,7 @@ class GroqModel(Model):
         if choice.message.tool_calls is not None:
             for c in choice.message.tool_calls:
                 items.append(ToolCallPart(tool_name=c.function.name, args=c.function.arguments, tool_call_id=c.id))
-        return ModelResponse(items, model_name=response.model, timestamp=timestamp)
+        return ModelResponse(items, usage=_map_usage(response), model_name=response.model, timestamp=timestamp)
     async def _process_streamed_response(self, response: AsyncStream[chat.ChatCompletionChunk]) -> GroqStreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/instrumented.py RENAMED Viewed

@@ -23,7 +23,6 @@ from ..messages import (
     ModelResponse,
 )
 from ..settings import ModelSettings
-from ..usage import Usage
 from . import KnownModelName, Model, ModelRequestParameters, StreamedResponse
 from .wrapper import WrapperModel
@@ -122,11 +121,11 @@ class InstrumentedModel(WrapperModel):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, Usage]:
+    ) -> ModelResponse:
         with self._instrument(messages, model_settings, model_request_parameters) as finish:
-            response, usage = await super().request(messages, model_settings, model_request_parameters)
-            finish(response, usage)
-            return response, usage
+            response = await super().request(messages, model_settings, model_request_parameters)
+            finish(response)
+            return response
     @asynccontextmanager
     async def request_stream(
@@ -144,7 +143,7 @@ class InstrumentedModel(WrapperModel):
                     yield response_stream
             finally:
                 if response_stream:
-                    finish(response_stream.get(), response_stream.usage())
+                    finish(response_stream.get())
     @contextmanager
     def _instrument(
@@ -152,7 +151,7 @@ class InstrumentedModel(WrapperModel):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> Iterator[Callable[[ModelResponse, Usage], None]]:
+    ) -> Iterator[Callable[[ModelResponse], None]]:
         operation = 'chat'
         span_name = f'{operation} {self.model_name}'
         # TODO Missing attributes:
@@ -177,7 +176,7 @@ class InstrumentedModel(WrapperModel):
         with self.settings.tracer.start_as_current_span(span_name, attributes=attributes) as span:
-            def finish(response: ModelResponse, usage: Usage):
+            def finish(response: ModelResponse):
                 if not span.is_recording():
                     return
@@ -193,7 +192,7 @@ class InstrumentedModel(WrapperModel):
                             },
                         )
                     )
-                new_attributes: dict[str, AttributeValue] = usage.opentelemetry_attributes()  # type: ignore
+                new_attributes: dict[str, AttributeValue] = response.usage.opentelemetry_attributes()  # pyright: ignore[reportAssignmentType]
                 attributes.update(getattr(span, 'attributes', {}))
                 request_model = attributes[GEN_AI_REQUEST_MODEL_ATTRIBUTE]
                 new_attributes['gen_ai.response.model'] = response.model_name or request_model

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/mistral.py RENAMED Viewed

@@ -147,13 +147,15 @@ class MistralModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, Usage]:
+    ) -> ModelResponse:
         """Make a non-streaming request to the model from Pydantic AI call."""
         check_allow_model_requests()
         response = await self._completions_create(
             messages, cast(MistralModelSettings, model_settings or {}), model_request_parameters
         )
-        return self._process_response(response), _map_usage(response)
+        model_response = self._process_response(response)
+        model_response.usage.requests = 1
+        return model_response
     @asynccontextmanager
     async def request_stream(
@@ -323,7 +325,7 @@ class MistralModel(Model):
                 tool = self._map_mistral_to_pydantic_tool_call(tool_call=tool_call)
                 parts.append(tool)
-        return ModelResponse(parts, model_name=response.model, timestamp=timestamp)
+        return ModelResponse(parts, usage=_map_usage(response), model_name=response.model, timestamp=timestamp)
     async def _process_streamed_response(
         self,

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/openai.py RENAMED Viewed

@@ -192,12 +192,14 @@ class OpenAIModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         check_allow_model_requests()
         response = await self._completions_create(
             messages, False, cast(OpenAIModelSettings, model_settings or {}), model_request_parameters
         )
-        return self._process_response(response), _map_usage(response)
+        model_response = self._process_response(response)
+        model_response.usage.requests = 1
+        return model_response
     @asynccontextmanager
     async def request_stream(
@@ -304,7 +306,7 @@ class OpenAIModel(Model):
         if choice.message.tool_calls is not None:
             for c in choice.message.tool_calls:
                 items.append(ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id))
-        return ModelResponse(items, model_name=response.model, timestamp=timestamp)
+        return ModelResponse(items, usage=_map_usage(response), model_name=response.model, timestamp=timestamp)
     async def _process_streamed_response(self, response: AsyncStream[ChatCompletionChunk]) -> OpenAIStreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""
@@ -522,12 +524,12 @@ class OpenAIResponsesModel(Model):
         messages: list[ModelRequest | ModelResponse],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, usage.Usage]:
+    ) -> ModelResponse:
         check_allow_model_requests()
         response = await self._responses_create(
             messages, False, cast(OpenAIResponsesModelSettings, model_settings or {}), model_request_parameters
         )
-        return self._process_response(response), _map_usage(response)
+        return self._process_response(response)
     @asynccontextmanager
     async def request_stream(
@@ -554,7 +556,7 @@ class OpenAIResponsesModel(Model):
         for item in response.output:
             if item.type == 'function_call':
                 items.append(ToolCallPart(item.name, item.arguments, tool_call_id=item.call_id))
-        return ModelResponse(items, model_name=response.model, timestamp=timestamp)
+        return ModelResponse(items, usage=_map_usage(response), model_name=response.model, timestamp=timestamp)
     async def _process_streamed_response(
         self, response: AsyncStream[responses.ResponseStreamEvent]
@@ -935,6 +937,7 @@ def _map_usage(response: chat.ChatCompletion | ChatCompletionChunk | responses.R
         if response_usage.prompt_tokens_details is not None:
             details.update(response_usage.prompt_tokens_details.model_dump(exclude_none=True))
         return usage.Usage(
+            requests=1,
             request_tokens=response_usage.prompt_tokens,
             response_tokens=response_usage.completion_tokens,
             total_tokens=response_usage.total_tokens,

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/test.py RENAMED Viewed

@@ -86,11 +86,12 @@ class TestModel(Model):
         messages: list[ModelMessage],
         model_settings: ModelSettings | None,
         model_request_parameters: ModelRequestParameters,
-    ) -> tuple[ModelResponse, Usage]:
+    ) -> ModelResponse:
         self.last_model_request_parameters = model_request_parameters
         model_response = self._request(messages, model_settings, model_request_parameters)
-        usage = _estimate_usage([*messages, model_response])
-        return model_response, usage
+        model_response.usage = _estimate_usage([*messages, model_response])
+        model_response.usage.requests = 1
+        return model_response
     @asynccontextmanager
     async def request_stream(

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/models/wrapper.py RENAMED Viewed

@@ -7,7 +7,6 @@ from typing import Any
 from ..messages import ModelMessage, ModelResponse
 from ..settings import ModelSettings
-from ..usage import Usage
 from . import KnownModelName, Model, ModelRequestParameters, StreamedResponse, infer_model
@@ -24,7 +23,7 @@ class WrapperModel(Model):
     def __init__(self, wrapped: Model | KnownModelName):
         self.wrapped = infer_model(wrapped)
-    async def request(self, *args: Any, **kwargs: Any) -> tuple[ModelResponse, Usage]:
+    async def request(self, *args: Any, **kwargs: Any) -> ModelResponse:
         return await self.wrapped.request(*args, **kwargs)
     @asynccontextmanager

{pydantic_ai_slim-0.1.11 → pydantic_ai_slim-0.2.0}/pydantic_ai/usage.py RENAMED Viewed

@@ -28,14 +28,12 @@ class Usage:
     details: dict[str, int] | None = None
     """Any extra details returned by the model."""
-    def incr(self, incr_usage: Usage, *, requests: int = 0) -> None:
+    def incr(self, incr_usage: Usage) -> None:
         """Increment the usage in place.
         Args:
             incr_usage: The usage to increment by.
-            requests: The number of requests to increment by in addition to `incr_usage.requests`.
         """
-        self.requests += requests
         for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens':
             self_value = getattr(self, f)
             other_value = getattr(incr_usage, f)
@@ -66,6 +64,10 @@ class Usage:
             result[f'gen_ai.usage.details.{key}'] = value
         return {k: v for k, v in result.items() if v}
+    def has_values(self) -> bool:
+        """Whether any values are set and non-zero."""
+        return bool(self.requests or self.request_tokens or self.response_tokens or self.details)
 @dataclass
 class UsageLimits: