PyPI - pydantic-ai-slim - Versions diffs - 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

pydantic-ai-slim 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (21) hide show

pydantic_ai/__init__.py +12 -2
pydantic_ai/_result.py +4 -7
pydantic_ai/_system_prompt.py +2 -2
pydantic_ai/agent.py +85 -75
pydantic_ai/exceptions.py +20 -2
pydantic_ai/messages.py +29 -7
pydantic_ai/models/__init__.py +10 -9
pydantic_ai/models/anthropic.py +12 -12
pydantic_ai/models/function.py +16 -22
pydantic_ai/models/gemini.py +16 -18
pydantic_ai/models/groq.py +21 -23
pydantic_ai/models/mistral.py +24 -36
pydantic_ai/models/openai.py +21 -23
pydantic_ai/models/test.py +23 -17
pydantic_ai/result.py +63 -33
pydantic_ai/settings.py +65 -0
pydantic_ai/tools.py +24 -14
{pydantic_ai_slim-0.0.13.dist-info → pydantic_ai_slim-0.0.14.dist-info}/METADATA +1 -1
pydantic_ai_slim-0.0.14.dist-info/RECORD +26 -0
pydantic_ai_slim-0.0.13.dist-info/RECORD +0 -26
{pydantic_ai_slim-0.0.13.dist-info → pydantic_ai_slim-0.0.14.dist-info}/WHEEL +0 -0

pydantic_ai/models/openai.py CHANGED Viewed

@@ -13,7 +13,6 @@ from typing_extensions import assert_never
 from .. import UnexpectedModelBehavior, _utils, result
 from .._utils import guard_tool_call_id as _guard_tool_call_id
 from ..messages import (
-    ArgsJson,
     ModelMessage,
     ModelRequest,
     ModelResponse,
@@ -25,7 +24,7 @@ from ..messages import (
     ToolReturnPart,
     UserPromptPart,
 )
-from ..result import Cost
+from ..result import Usage
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import (
@@ -147,9 +146,9 @@ class OpenAIAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, result.Cost]:
+    ) -> tuple[ModelResponse, result.Usage]:
         response = await self._completions_create(messages, False, model_settings)
-        return self._process_response(response), _map_cost(response)
+        return self._process_response(response), _map_usage(response)
     @asynccontextmanager
     async def request_stream(
@@ -211,14 +210,14 @@ class OpenAIAgentModel(AgentModel):
             items.append(TextPart(choice.message.content))
         if choice.message.tool_calls is not None:
             for c in choice.message.tool_calls:
-                items.append(ToolCallPart.from_json(c.function.name, c.function.arguments, c.id))
+                items.append(ToolCallPart.from_raw_args(c.function.name, c.function.arguments, c.id))
         return ModelResponse(items, timestamp=timestamp)
     @staticmethod
     async def _process_streamed_response(response: AsyncStream[ChatCompletionChunk]) -> EitherStreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""
         timestamp: datetime | None = None
-        start_cost = Cost()
+        start_usage = Usage()
         # the first chunk may contain enough information so we iterate until we get either `tool_calls` or `content`
         while True:
             try:
@@ -227,19 +226,19 @@ class OpenAIAgentModel(AgentModel):
                 raise UnexpectedModelBehavior('Streamed response ended without content or tool calls') from e
             timestamp = timestamp or datetime.fromtimestamp(chunk.created, tz=timezone.utc)
-            start_cost += _map_cost(chunk)
+            start_usage += _map_usage(chunk)
             if chunk.choices:
                 delta = chunk.choices[0].delta
                 if delta.content is not None:
-                    return OpenAIStreamTextResponse(delta.content, response, timestamp, start_cost)
+                    return OpenAIStreamTextResponse(delta.content, response, timestamp, start_usage)
                 elif delta.tool_calls is not None:
                     return OpenAIStreamStructuredResponse(
                         response,
                         {c.index: c for c in delta.tool_calls},
                         timestamp,
-                        start_cost,
+                        start_usage,
                     )
                 # else continue until we get either delta.content or delta.tool_calls
@@ -302,7 +301,7 @@ class OpenAIStreamTextResponse(StreamTextResponse):
     _first: str | None
     _response: AsyncStream[ChatCompletionChunk]
     _timestamp: datetime
-    _cost: result.Cost
+    _usage: result.Usage
     _buffer: list[str] = field(default_factory=list, init=False)
     async def __anext__(self) -> None:
@@ -312,7 +311,7 @@ class OpenAIStreamTextResponse(StreamTextResponse):
             return None
         chunk = await self._response.__anext__()
-        self._cost += _map_cost(chunk)
+        self._usage += _map_usage(chunk)
         try:
             choice = chunk.choices[0]
         except IndexError:
@@ -328,8 +327,8 @@ class OpenAIStreamTextResponse(StreamTextResponse):
         yield from self._buffer
         self._buffer.clear()
-    def cost(self) -> Cost:
-        return self._cost
+    def usage(self) -> Usage:
+        return self._usage
     def timestamp(self) -> datetime:
         return self._timestamp
@@ -342,11 +341,11 @@ class OpenAIStreamStructuredResponse(StreamStructuredResponse):
     _response: AsyncStream[ChatCompletionChunk]
     _delta_tool_calls: dict[int, ChoiceDeltaToolCall]
     _timestamp: datetime
-    _cost: result.Cost
+    _usage: result.Usage
     async def __anext__(self) -> None:
         chunk = await self._response.__anext__()
-        self._cost += _map_cost(chunk)
+        self._usage += _map_usage(chunk)
         try:
             choice = chunk.choices[0]
         except IndexError:
@@ -372,37 +371,36 @@ class OpenAIStreamStructuredResponse(StreamStructuredResponse):
         for c in self._delta_tool_calls.values():
             if f := c.function:
                 if f.name is not None and f.arguments is not None:
-                    items.append(ToolCallPart.from_json(f.name, f.arguments, c.id))
+                    items.append(ToolCallPart.from_raw_args(f.name, f.arguments, c.id))
         return ModelResponse(items, timestamp=self._timestamp)
-    def cost(self) -> Cost:
-        return self._cost
+    def usage(self) -> Usage:
+        return self._usage
     def timestamp(self) -> datetime:
         return self._timestamp
 def _map_tool_call(t: ToolCallPart) -> chat.ChatCompletionMessageToolCallParam:
-    assert isinstance(t.args, ArgsJson), f'Expected ArgsJson, got {t.args}'
     return chat.ChatCompletionMessageToolCallParam(
         id=_guard_tool_call_id(t=t, model_source='OpenAI'),
         type='function',
-        function={'name': t.tool_name, 'arguments': t.args.args_json},
+        function={'name': t.tool_name, 'arguments': t.args_as_json_str()},
     )
-def _map_cost(response: chat.ChatCompletion | ChatCompletionChunk) -> result.Cost:
+def _map_usage(response: chat.ChatCompletion | ChatCompletionChunk) -> result.Usage:
     usage = response.usage
     if usage is None:
-        return result.Cost()
+        return result.Usage()
     else:
         details: dict[str, int] = {}
         if usage.completion_tokens_details is not None:
             details.update(usage.completion_tokens_details.model_dump(exclude_none=True))
         if usage.prompt_tokens_details is not None:
             details.update(usage.prompt_tokens_details.model_dump(exclude_none=True))
-        return result.Cost(
+        return result.Usage(
             request_tokens=usage.prompt_tokens,
             response_tokens=usage.completion_tokens,
             total_tokens=usage.total_tokens,

pydantic_ai/models/test.py CHANGED Viewed

@@ -21,7 +21,7 @@ from ..messages import (
     ToolCallPart,
     ToolReturnPart,
 )
-from ..result import Cost
+from ..result import Usage
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import (
@@ -31,6 +31,7 @@ from . import (
     StreamStructuredResponse,
     StreamTextResponse,
 )
+from .function import _estimate_string_usage, _estimate_usage  # pyright: ignore[reportPrivateUsage]
 @dataclass
@@ -131,15 +132,17 @@ class TestAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, Cost]:
-        return self._request(messages, model_settings), Cost()
+    ) -> tuple[ModelResponse, Usage]:
+        model_response = self._request(messages, model_settings)
+        usage = _estimate_usage([*messages, model_response])
+        return model_response, usage
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> AsyncIterator[EitherStreamedResponse]:
         msg = self._request(messages, model_settings)
-        cost = Cost()
+        usage = _estimate_usage(messages)
         # TODO: Rework this once we make StreamTextResponse more general
         texts: list[str] = []
@@ -153,9 +156,9 @@ class TestAgentModel(AgentModel):
                 assert_never(item)
         if texts:
-            yield TestStreamTextResponse('\n\n'.join(texts), cost)
+            yield TestStreamTextResponse('\n\n'.join(texts), usage)
         else:
-            yield TestStreamStructuredResponse(msg, cost)
+            yield TestStreamStructuredResponse(msg, usage)
     def gen_tool_args(self, tool_def: ToolDefinition) -> Any:
         return _JsonSchemaTestData(tool_def.parameters_json_schema, self.seed).generate()
@@ -164,7 +167,7 @@ class TestAgentModel(AgentModel):
         # if there are tools, the first thing we want to do is call all of them
         if self.tool_calls and not any(isinstance(m, ModelResponse) for m in messages):
             return ModelResponse(
-                parts=[ToolCallPart.from_dict(name, self.gen_tool_args(args)) for name, args in self.tool_calls]
+                parts=[ToolCallPart.from_raw_args(name, self.gen_tool_args(args)) for name, args in self.tool_calls]
             )
         if messages:
@@ -176,7 +179,7 @@ class TestAgentModel(AgentModel):
             if new_retry_names:
                 return ModelResponse(
                     parts=[
-                        ToolCallPart.from_dict(name, self.gen_tool_args(args))
+                        ToolCallPart.from_raw_args(name, self.gen_tool_args(args))
                         for name, args in self.tool_calls
                         if name in new_retry_names
                     ]
@@ -202,10 +205,10 @@ class TestAgentModel(AgentModel):
             custom_result_args = self.result.right
             result_tool = self.result_tools[self.seed % len(self.result_tools)]
             if custom_result_args is not None:
-                return ModelResponse(parts=[ToolCallPart.from_dict(result_tool.name, custom_result_args)])
+                return ModelResponse(parts=[ToolCallPart.from_raw_args(result_tool.name, custom_result_args)])
             else:
                 response_args = self.gen_tool_args(result_tool)
-                return ModelResponse(parts=[ToolCallPart.from_dict(result_tool.name, response_args)])
+                return ModelResponse(parts=[ToolCallPart.from_raw_args(result_tool.name, response_args)])
 @dataclass
@@ -213,7 +216,7 @@ class TestStreamTextResponse(StreamTextResponse):
     """A text response that streams test data."""
     _text: str
-    _cost: Cost
+    _usage: Usage
     _iter: Iterator[str] = field(init=False)
     _timestamp: datetime = field(default_factory=_utils.now_utc)
     _buffer: list[str] = field(default_factory=list, init=False)
@@ -228,14 +231,17 @@ class TestStreamTextResponse(StreamTextResponse):
         self._iter = iter(words)
     async def __anext__(self) -> None:
-        self._buffer.append(_utils.sync_anext(self._iter))
+        next_str = _utils.sync_anext(self._iter)
+        response_tokens = _estimate_string_usage(next_str)
+        self._usage += Usage(response_tokens=response_tokens, total_tokens=response_tokens)
+        self._buffer.append(next_str)
     def get(self, *, final: bool = False) -> Iterable[str]:
         yield from self._buffer
         self._buffer.clear()
-    def cost(self) -> Cost:
-        return self._cost
+    def usage(self) -> Usage:
+        return self._usage
     def timestamp(self) -> datetime:
         return self._timestamp
@@ -246,7 +252,7 @@ class TestStreamStructuredResponse(StreamStructuredResponse):
     """A structured response that streams test data."""
     _structured_response: ModelResponse
-    _cost: Cost
+    _usage: Usage
     _iter: Iterator[None] = field(default_factory=lambda: iter([None]))
     _timestamp: datetime = field(default_factory=_utils.now_utc, init=False)
@@ -256,8 +262,8 @@ class TestStreamStructuredResponse(StreamStructuredResponse):
     def get(self, *, final: bool = False) -> ModelResponse:
         return self._structured_response
-    def cost(self) -> Cost:
-        return self._cost
+    def usage(self) -> Usage:
+        return self._usage
     def timestamp(self) -> datetime:
         return self._timestamp

pydantic_ai/result.py CHANGED Viewed

@@ -9,11 +9,12 @@ from typing import Generic, TypeVar, cast
 import logfire_api
 from . import _result, _utils, exceptions, messages as _messages, models
-from .tools import AgentDeps
+from .settings import UsageLimits
+from .tools import AgentDeps, RunContext
 __all__ = (
     'ResultData',
-    'Cost',
+    'Usage',
     'RunResult',
     'StreamedRunResult',
 )
@@ -26,30 +27,32 @@ _logfire = logfire_api.Logfire(otel_scope='pydantic-ai')
 @dataclass
-class Cost:
-    """Cost of a request or run.
+class Usage:
+    """LLM usage associated to a request or run.
-    Responsibility for calculating costs is on the model used, PydanticAI simply sums the cost of requests.
+    Responsibility for calculating usage is on the model; PydanticAI simply sums the usage information across requests.
-    You'll need to look up the documentation of the model you're using to convent "token count" costs to monetary costs.
+    You'll need to look up the documentation of the model you're using to convert usage to monetary costs.
     """
+    requests: int = 0
+    """Number of requests made."""
     request_tokens: int | None = None
-    """Tokens used in processing the request."""
+    """Tokens used in processing requests."""
     response_tokens: int | None = None
-    """Tokens used in generating the response."""
+    """Tokens used in generating responses."""
     total_tokens: int | None = None
     """Total tokens used in the whole run, should generally be equal to `request_tokens + response_tokens`."""
     details: dict[str, int] | None = None
     """Any extra details returned by the model."""
-    def __add__(self, other: Cost) -> Cost:
-        """Add two costs together.
+    def __add__(self, other: Usage) -> Usage:
+        """Add two Usages together.
-        This is provided so it's trivial to sum costs from multiple requests and runs.
+        This is provided so it's trivial to sum usage information from multiple requests and runs.
         """
         counts: dict[str, int] = {}
-        for f in 'request_tokens', 'response_tokens', 'total_tokens':
+        for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens':
             self_value = getattr(self, f)
             other_value = getattr(other, f)
             if self_value is not None or other_value is not None:
@@ -61,7 +64,7 @@ class Cost:
             for key, value in other.details.items():
                 details[key] = details.get(key, 0) + value
-        return Cost(**counts, details=details or None)
+        return Usage(**counts, details=details or None)
 @dataclass
@@ -95,7 +98,7 @@ class _BaseRunResult(ABC, Generic[ResultData]):
         return _messages.ModelMessagesTypeAdapter.dump_json(self.new_messages())
     @abstractmethod
-    def cost(self) -> Cost:
+    def usage(self) -> Usage:
         raise NotImplementedError()
@@ -105,22 +108,23 @@ class RunResult(_BaseRunResult[ResultData]):
     data: ResultData
     """Data from the final response in the run."""
-    _cost: Cost
+    _usage: Usage
-    def cost(self) -> Cost:
-        """Return the cost of the whole run."""
-        return self._cost
+    def usage(self) -> Usage:
+        """Return the usage of the whole run."""
+        return self._usage
 @dataclass
 class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultData]):
     """Result of a streamed run that returns structured data via a tool call."""
-    cost_so_far: Cost
-    """Cost of the run up until the last request."""
+    usage_so_far: Usage
+    """Usage of the run up until the last request."""
+    _usage_limits: UsageLimits | None
     _stream_response: models.EitherStreamedResponse
     _result_schema: _result.ResultSchema[ResultData] | None
-    _deps: AgentDeps
+    _run_ctx: RunContext[AgentDeps]
     _result_validators: list[_result.ResultValidator[AgentDeps, ResultData]]
     _result_tool_name: str | None
     _on_complete: Callable[[], Awaitable[None]]
@@ -173,11 +177,15 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
                 Debouncing is particularly important for long structured responses to reduce the overhead of
                 performing validation as each token is received.
         """
+        usage_checking_stream = _get_usage_checking_stream_response(
+            self._stream_response, self._usage_limits, self.usage
+        )
         with _logfire.span('response stream text') as lf_span:
             if isinstance(self._stream_response, models.StreamStructuredResponse):
                 raise exceptions.UserError('stream_text() can only be used with text responses')
             if delta:
-                async with _utils.group_by_temporal(self._stream_response, debounce_by) as group_iter:
+                async with _utils.group_by_temporal(usage_checking_stream, debounce_by) as group_iter:
                     async for _ in group_iter:
                         yield ''.join(self._stream_response.get())
                 final_delta = ''.join(self._stream_response.get(final=True))
@@ -188,7 +196,7 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
                 # yielding at each step
                 chunks: list[str] = []
                 combined = ''
-                async with _utils.group_by_temporal(self._stream_response, debounce_by) as group_iter:
+                async with _utils.group_by_temporal(usage_checking_stream, debounce_by) as group_iter:
                     async for _ in group_iter:
                         new = False
                         for chunk in self._stream_response.get():
@@ -225,6 +233,10 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
         Returns:
             An async iterable of the structured response message and whether that is the last message.
         """
+        usage_checking_stream = _get_usage_checking_stream_response(
+            self._stream_response, self._usage_limits, self.usage
+        )
         with _logfire.span('response stream structured') as lf_span:
             if isinstance(self._stream_response, models.StreamTextResponse):
                 raise exceptions.UserError('stream_structured() can only be used with structured responses')
@@ -235,7 +247,7 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
                     if isinstance(item, _messages.ToolCallPart) and item.has_content():
                         yield msg, False
                         break
-                async with _utils.group_by_temporal(self._stream_response, debounce_by) as group_iter:
+                async with _utils.group_by_temporal(usage_checking_stream, debounce_by) as group_iter:
                     async for _ in group_iter:
                         msg = self._stream_response.get()
                         for item in msg.parts:
@@ -249,8 +261,13 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
     async def get_data(self) -> ResultData:
         """Stream the whole response, validate and return it."""
-        async for _ in self._stream_response:
+        usage_checking_stream = _get_usage_checking_stream_response(
+            self._stream_response, self._usage_limits, self.usage
+        )
+        async for _ in usage_checking_stream:
             pass
         if isinstance(self._stream_response, models.StreamTextResponse):
             text = ''.join(self._stream_response.get(final=True))
             text = await self._validate_text_result(text)
@@ -266,13 +283,13 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
         """Return whether the stream response contains structured data (as opposed to text)."""
         return isinstance(self._stream_response, models.StreamStructuredResponse)
-    def cost(self) -> Cost:
-        """Return the cost of the whole run.
+    def usage(self) -> Usage:
+        """Return the usage of the whole run.
         !!! note
-            This won't return the full cost until the stream is finished.
+            This won't return the full usage until the stream is finished.
         """
-        return self.cost_so_far + self._stream_response.cost()
+        return self.usage_so_far + self._stream_response.usage()
     def timestamp(self) -> datetime:
         """Get the timestamp of the response."""
@@ -294,17 +311,15 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
         result_data = result_tool.validate(call, allow_partial=allow_partial, wrap_validation_errors=False)
         for validator in self._result_validators:
-            result_data = await validator.validate(result_data, self._deps, 0, call, self._all_messages)
+            result_data = await validator.validate(result_data, call, self._run_ctx)
         return result_data
     async def _validate_text_result(self, text: str) -> str:
         for validator in self._result_validators:
             text = await validator.validate(  # pyright: ignore[reportAssignmentType]
                 text,  # pyright: ignore[reportArgumentType]
-                self._deps,
-                0,
                 None,
-                self._all_messages,
+                self._run_ctx,
             )
         return text
@@ -312,3 +327,18 @@ class StreamedRunResult(_BaseRunResult[ResultData], Generic[AgentDeps, ResultDat
         self.is_complete = True
         self._all_messages.append(message)
         await self._on_complete()
+def _get_usage_checking_stream_response(
+    stream_response: AsyncIterator[ResultData], limits: UsageLimits | None, get_usage: Callable[[], Usage]
+) -> AsyncIterator[ResultData]:
+    if limits is not None and limits.has_token_limits():
+        async def _usage_checking_iterator():
+            async for item in stream_response:
+                limits.check_tokens(get_usage())
+                yield item
+        return _usage_checking_iterator()
+    else:
+        return stream_response

pydantic_ai/settings.py CHANGED Viewed

@@ -1,8 +1,16 @@
 from __future__ import annotations
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
 from httpx import Timeout
 from typing_extensions import TypedDict
+from .exceptions import UsageLimitExceeded
+if TYPE_CHECKING:
+    from .result import Usage
 class ModelSettings(TypedDict, total=False):
     """Settings to configure an LLM.
@@ -70,3 +78,60 @@ def merge_model_settings(base: ModelSettings | None, overrides: ModelSettings |
         return base | overrides
     else:
         return base or overrides
+@dataclass
+class UsageLimits:
+    """Limits on model usage.
+    The request count is tracked by pydantic_ai, and the request limit is checked before each request to the model.
+    Token counts are provided in responses from the model, and the token limits are checked after each response.
+    Each of the limits can be set to `None` to disable that limit.
+    """
+    request_limit: int | None = 50
+    """The maximum number of requests allowed to the model."""
+    request_tokens_limit: int | None = None
+    """The maximum number of tokens allowed in requests to the model."""
+    response_tokens_limit: int | None = None
+    """The maximum number of tokens allowed in responses from the model."""
+    total_tokens_limit: int | None = None
+    """The maximum number of tokens allowed in requests and responses combined."""
+    def has_token_limits(self) -> bool:
+        """Returns `True` if this instance places any limits on token counts.
+        If this returns `False`, the `check_tokens` method will never raise an error.
+        This is useful because if we have token limits, we need to check them after receiving each streamed message.
+        If there are no limits, we can skip that processing in the streaming response iterator.
+        """
+        return any(
+            limit is not None
+            for limit in (self.request_tokens_limit, self.response_tokens_limit, self.total_tokens_limit)
+        )
+    def check_before_request(self, usage: Usage) -> None:
+        """Raises a `UsageLimitExceeded` exception if the next request would exceed the request_limit."""
+        request_limit = self.request_limit
+        if request_limit is not None and usage.requests >= request_limit:
+            raise UsageLimitExceeded(f'The next request would exceed the request_limit of {request_limit}')
+    def check_tokens(self, usage: Usage) -> None:
+        """Raises a `UsageLimitExceeded` exception if the usage exceeds any of the token limits."""
+        request_tokens = usage.request_tokens or 0
+        if self.request_tokens_limit is not None and request_tokens > self.request_tokens_limit:
+            raise UsageLimitExceeded(
+                f'Exceeded the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})'
+            )
+        response_tokens = usage.response_tokens or 0
+        if self.response_tokens_limit is not None and response_tokens > self.response_tokens_limit:
+            raise UsageLimitExceeded(
+                f'Exceeded the response_tokens_limit of {self.response_tokens_limit} ({response_tokens=})'
+            )
+        total_tokens = request_tokens + response_tokens
+        if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
+            raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')

pydantic-ai-slim 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

Potentially problematic release.

pydantic-ai-slim 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl