pydantic-ai-slim 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydantic_ai/_agent_graph.py +60 -57
- pydantic_ai/_cli.py +18 -3
- pydantic_ai/_parts_manager.py +5 -4
- pydantic_ai/_run_context.py +2 -2
- pydantic_ai/_tool_manager.py +50 -29
- pydantic_ai/ag_ui.py +4 -4
- pydantic_ai/agent/__init__.py +69 -84
- pydantic_ai/agent/abstract.py +16 -18
- pydantic_ai/agent/wrapper.py +4 -6
- pydantic_ai/direct.py +4 -4
- pydantic_ai/durable_exec/temporal/_agent.py +13 -15
- pydantic_ai/durable_exec/temporal/_model.py +2 -2
- pydantic_ai/messages.py +16 -6
- pydantic_ai/models/__init__.py +5 -5
- pydantic_ai/models/anthropic.py +47 -46
- pydantic_ai/models/bedrock.py +25 -27
- pydantic_ai/models/cohere.py +20 -25
- pydantic_ai/models/fallback.py +15 -15
- pydantic_ai/models/function.py +7 -9
- pydantic_ai/models/gemini.py +43 -39
- pydantic_ai/models/google.py +59 -40
- pydantic_ai/models/groq.py +23 -19
- pydantic_ai/models/huggingface.py +27 -23
- pydantic_ai/models/instrumented.py +4 -4
- pydantic_ai/models/mcp_sampling.py +1 -2
- pydantic_ai/models/mistral.py +24 -22
- pydantic_ai/models/openai.py +101 -45
- pydantic_ai/models/test.py +4 -5
- pydantic_ai/profiles/__init__.py +10 -1
- pydantic_ai/profiles/deepseek.py +1 -1
- pydantic_ai/profiles/moonshotai.py +1 -1
- pydantic_ai/profiles/openai.py +13 -3
- pydantic_ai/profiles/qwen.py +4 -1
- pydantic_ai/providers/__init__.py +4 -0
- pydantic_ai/providers/huggingface.py +27 -0
- pydantic_ai/providers/ollama.py +105 -0
- pydantic_ai/providers/openai.py +1 -1
- pydantic_ai/providers/openrouter.py +2 -0
- pydantic_ai/result.py +6 -6
- pydantic_ai/run.py +4 -11
- pydantic_ai/tools.py +9 -9
- pydantic_ai/usage.py +229 -67
- {pydantic_ai_slim-0.7.1.dist-info → pydantic_ai_slim-0.7.3.dist-info}/METADATA +10 -4
- {pydantic_ai_slim-0.7.1.dist-info → pydantic_ai_slim-0.7.3.dist-info}/RECORD +47 -46
- {pydantic_ai_slim-0.7.1.dist-info → pydantic_ai_slim-0.7.3.dist-info}/WHEEL +0 -0
- {pydantic_ai_slim-0.7.1.dist-info → pydantic_ai_slim-0.7.3.dist-info}/entry_points.txt +0 -0
- {pydantic_ai_slim-0.7.1.dist-info → pydantic_ai_slim-0.7.3.dist-info}/licenses/LICENSE +0 -0
pydantic_ai/providers/openai.py
CHANGED
|
@@ -17,6 +17,7 @@ from pydantic_ai.profiles.google import google_model_profile
|
|
|
17
17
|
from pydantic_ai.profiles.grok import grok_model_profile
|
|
18
18
|
from pydantic_ai.profiles.meta import meta_model_profile
|
|
19
19
|
from pydantic_ai.profiles.mistral import mistral_model_profile
|
|
20
|
+
from pydantic_ai.profiles.moonshotai import moonshotai_model_profile
|
|
20
21
|
from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer, OpenAIModelProfile, openai_model_profile
|
|
21
22
|
from pydantic_ai.profiles.qwen import qwen_model_profile
|
|
22
23
|
from pydantic_ai.providers import Provider
|
|
@@ -57,6 +58,7 @@ class OpenRouterProvider(Provider[AsyncOpenAI]):
|
|
|
57
58
|
'amazon': amazon_model_profile,
|
|
58
59
|
'deepseek': deepseek_model_profile,
|
|
59
60
|
'meta-llama': meta_model_profile,
|
|
61
|
+
'moonshotai': moonshotai_model_profile,
|
|
60
62
|
}
|
|
61
63
|
|
|
62
64
|
profile = None
|
pydantic_ai/result.py
CHANGED
|
@@ -27,7 +27,7 @@ from .output import (
|
|
|
27
27
|
OutputDataT,
|
|
28
28
|
ToolOutput,
|
|
29
29
|
)
|
|
30
|
-
from .usage import
|
|
30
|
+
from .usage import RunUsage, UsageLimits
|
|
31
31
|
|
|
32
32
|
__all__ = (
|
|
33
33
|
'OutputDataT',
|
|
@@ -52,7 +52,7 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
|
|
|
52
52
|
_tool_manager: ToolManager[AgentDepsT]
|
|
53
53
|
|
|
54
54
|
_agent_stream_iterator: AsyncIterator[AgentStreamEvent] | None = field(default=None, init=False)
|
|
55
|
-
_initial_run_ctx_usage:
|
|
55
|
+
_initial_run_ctx_usage: RunUsage = field(init=False)
|
|
56
56
|
|
|
57
57
|
def __post_init__(self):
|
|
58
58
|
self._initial_run_ctx_usage = copy(self._run_ctx.usage)
|
|
@@ -110,7 +110,7 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
|
|
|
110
110
|
"""Get the current state of the response."""
|
|
111
111
|
return self._raw_stream_response.get()
|
|
112
112
|
|
|
113
|
-
def usage(self) ->
|
|
113
|
+
def usage(self) -> RunUsage:
|
|
114
114
|
"""Return the usage of the whole run.
|
|
115
115
|
|
|
116
116
|
!!! note
|
|
@@ -196,7 +196,7 @@ class AgentStream(Generic[AgentDepsT, OutputDataT]):
|
|
|
196
196
|
and isinstance(event.part, _messages.TextPart)
|
|
197
197
|
and event.part.content
|
|
198
198
|
):
|
|
199
|
-
yield event.part.content, event.index
|
|
199
|
+
yield event.part.content, event.index # pragma: no cover
|
|
200
200
|
elif ( # pragma: no branch
|
|
201
201
|
isinstance(event, _messages.PartDeltaEvent)
|
|
202
202
|
and isinstance(event.delta, _messages.TextPartDelta)
|
|
@@ -382,7 +382,7 @@ class StreamedRunResult(Generic[AgentDepsT, OutputDataT]):
|
|
|
382
382
|
await self._marked_completed(self._stream_response.get())
|
|
383
383
|
return output
|
|
384
384
|
|
|
385
|
-
def usage(self) ->
|
|
385
|
+
def usage(self) -> RunUsage:
|
|
386
386
|
"""Return the usage of the whole run.
|
|
387
387
|
|
|
388
388
|
!!! note
|
|
@@ -425,7 +425,7 @@ class FinalResult(Generic[OutputDataT]):
|
|
|
425
425
|
def _get_usage_checking_stream_response(
|
|
426
426
|
stream_response: models.StreamedResponse,
|
|
427
427
|
limits: UsageLimits | None,
|
|
428
|
-
get_usage: Callable[[],
|
|
428
|
+
get_usage: Callable[[], RunUsage],
|
|
429
429
|
) -> AsyncIterator[AgentStreamEvent]:
|
|
430
430
|
if limits is not None and limits.has_token_limits():
|
|
431
431
|
|
pydantic_ai/run.py
CHANGED
|
@@ -66,9 +66,7 @@ class AgentRun(Generic[AgentDepsT, OutputDataT]):
|
|
|
66
66
|
CallToolsNode(
|
|
67
67
|
model_response=ModelResponse(
|
|
68
68
|
parts=[TextPart(content='The capital of France is Paris.')],
|
|
69
|
-
usage=
|
|
70
|
-
requests=1, request_tokens=56, response_tokens=7, total_tokens=63
|
|
71
|
-
),
|
|
69
|
+
usage=RequestUsage(input_tokens=56, output_tokens=7),
|
|
72
70
|
model_name='gpt-4o',
|
|
73
71
|
timestamp=datetime.datetime(...),
|
|
74
72
|
)
|
|
@@ -203,12 +201,7 @@ class AgentRun(Generic[AgentDepsT, OutputDataT]):
|
|
|
203
201
|
CallToolsNode(
|
|
204
202
|
model_response=ModelResponse(
|
|
205
203
|
parts=[TextPart(content='The capital of France is Paris.')],
|
|
206
|
-
usage=
|
|
207
|
-
requests=1,
|
|
208
|
-
request_tokens=56,
|
|
209
|
-
response_tokens=7,
|
|
210
|
-
total_tokens=63,
|
|
211
|
-
),
|
|
204
|
+
usage=RequestUsage(input_tokens=56, output_tokens=7),
|
|
212
205
|
model_name='gpt-4o',
|
|
213
206
|
timestamp=datetime.datetime(...),
|
|
214
207
|
)
|
|
@@ -235,7 +228,7 @@ class AgentRun(Generic[AgentDepsT, OutputDataT]):
|
|
|
235
228
|
assert isinstance(next_node, End), f'Unexpected node type: {type(next_node)}'
|
|
236
229
|
return next_node
|
|
237
230
|
|
|
238
|
-
def usage(self) -> _usage.
|
|
231
|
+
def usage(self) -> _usage.RunUsage:
|
|
239
232
|
"""Get usage statistics for the run so far, including token usage, model requests, and so on."""
|
|
240
233
|
return self._graph_run.state.usage
|
|
241
234
|
|
|
@@ -352,6 +345,6 @@ class AgentRunResult(Generic[OutputDataT]):
|
|
|
352
345
|
self.new_messages(output_tool_return_content=output_tool_return_content)
|
|
353
346
|
)
|
|
354
347
|
|
|
355
|
-
def usage(self) -> _usage.
|
|
348
|
+
def usage(self) -> _usage.RunUsage:
|
|
356
349
|
"""Return the usage of the whole run."""
|
|
357
350
|
return self._state.usage
|
pydantic_ai/tools.py
CHANGED
|
@@ -31,7 +31,7 @@ __all__ = (
|
|
|
31
31
|
ToolParams = ParamSpec('ToolParams', default=...)
|
|
32
32
|
"""Retrieval function param spec."""
|
|
33
33
|
|
|
34
|
-
SystemPromptFunc = Union[
|
|
34
|
+
SystemPromptFunc: TypeAlias = Union[
|
|
35
35
|
Callable[[RunContext[AgentDepsT]], str],
|
|
36
36
|
Callable[[RunContext[AgentDepsT]], Awaitable[str]],
|
|
37
37
|
Callable[[], str],
|
|
@@ -42,17 +42,17 @@ SystemPromptFunc = Union[
|
|
|
42
42
|
Usage `SystemPromptFunc[AgentDepsT]`.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
ToolFuncContext = Callable[Concatenate[RunContext[AgentDepsT], ToolParams], Any]
|
|
45
|
+
ToolFuncContext: TypeAlias = Callable[Concatenate[RunContext[AgentDepsT], ToolParams], Any]
|
|
46
46
|
"""A tool function that takes `RunContext` as the first argument.
|
|
47
47
|
|
|
48
48
|
Usage `ToolContextFunc[AgentDepsT, ToolParams]`.
|
|
49
49
|
"""
|
|
50
|
-
ToolFuncPlain = Callable[ToolParams, Any]
|
|
50
|
+
ToolFuncPlain: TypeAlias = Callable[ToolParams, Any]
|
|
51
51
|
"""A tool function that does not take `RunContext` as the first argument.
|
|
52
52
|
|
|
53
53
|
Usage `ToolPlainFunc[ToolParams]`.
|
|
54
54
|
"""
|
|
55
|
-
ToolFuncEither = Union[ToolFuncContext[AgentDepsT, ToolParams], ToolFuncPlain[ToolParams]]
|
|
55
|
+
ToolFuncEither: TypeAlias = Union[ToolFuncContext[AgentDepsT, ToolParams], ToolFuncPlain[ToolParams]]
|
|
56
56
|
"""Either kind of tool function.
|
|
57
57
|
|
|
58
58
|
This is just a union of [`ToolFuncContext`][pydantic_ai.tools.ToolFuncContext] and
|
|
@@ -60,7 +60,7 @@ This is just a union of [`ToolFuncContext`][pydantic_ai.tools.ToolFuncContext] a
|
|
|
60
60
|
|
|
61
61
|
Usage `ToolFuncEither[AgentDepsT, ToolParams]`.
|
|
62
62
|
"""
|
|
63
|
-
ToolPrepareFunc: TypeAlias =
|
|
63
|
+
ToolPrepareFunc: TypeAlias = Callable[[RunContext[AgentDepsT], 'ToolDefinition'], Awaitable['ToolDefinition | None']]
|
|
64
64
|
"""Definition of a function that can prepare a tool definition at call time.
|
|
65
65
|
|
|
66
66
|
See [tool docs](../tools.md#tool-prepare) for more information.
|
|
@@ -88,9 +88,9 @@ hitchhiker = Tool(hitchhiker, prepare=only_if_42)
|
|
|
88
88
|
Usage `ToolPrepareFunc[AgentDepsT]`.
|
|
89
89
|
"""
|
|
90
90
|
|
|
91
|
-
ToolsPrepareFunc: TypeAlias =
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
ToolsPrepareFunc: TypeAlias = Callable[
|
|
92
|
+
[RunContext[AgentDepsT], list['ToolDefinition']], Awaitable['list[ToolDefinition] | None']
|
|
93
|
+
]
|
|
94
94
|
"""Definition of a function that can prepare the tool definition of all tools for each step.
|
|
95
95
|
This is useful if you want to customize the definition of multiple tools or you want to register
|
|
96
96
|
a subset of tools for a given step.
|
|
@@ -118,7 +118,7 @@ agent = Agent('openai:gpt-4o', prepare_tools=turn_on_strict_if_openai)
|
|
|
118
118
|
Usage `ToolsPrepareFunc[AgentDepsT]`.
|
|
119
119
|
"""
|
|
120
120
|
|
|
121
|
-
DocstringFormat = Literal['google', 'numpy', 'sphinx', 'auto']
|
|
121
|
+
DocstringFormat: TypeAlias = Literal['google', 'numpy', 'sphinx', 'auto']
|
|
122
122
|
"""Supported docstring formats.
|
|
123
123
|
|
|
124
124
|
* `'google'` — [Google-style](https://google.github.io/styleguide/pyguide.html#381-docstrings) docstrings.
|
pydantic_ai/usage.py
CHANGED
|
@@ -1,67 +1,62 @@
|
|
|
1
1
|
from __future__ import annotations as _annotations
|
|
2
2
|
|
|
3
|
+
import dataclasses
|
|
3
4
|
from copy import copy
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
+
from dataclasses import dataclass, fields
|
|
6
|
+
|
|
7
|
+
from typing_extensions import deprecated, overload
|
|
5
8
|
|
|
6
9
|
from . import _utils
|
|
7
10
|
from .exceptions import UsageLimitExceeded
|
|
8
11
|
|
|
9
|
-
__all__ = 'Usage', 'UsageLimits'
|
|
12
|
+
__all__ = 'RequestUsage', 'RunUsage', 'Usage', 'UsageLimits'
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
@dataclass(repr=False)
|
|
13
|
-
class
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
Responsibility for calculating usage is on the model; Pydantic AI simply sums the usage information across requests.
|
|
16
|
+
class UsageBase:
|
|
17
|
+
input_tokens: int = 0
|
|
18
|
+
"""Number of input/prompt tokens."""
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
"""
|
|
20
|
+
cache_write_tokens: int = 0
|
|
21
|
+
"""Number of tokens written to the cache."""
|
|
22
|
+
cache_read_tokens: int = 0
|
|
23
|
+
"""Number of tokens read from the cache."""
|
|
20
24
|
|
|
21
|
-
|
|
22
|
-
"""Number of
|
|
23
|
-
request_tokens: int | None = None
|
|
24
|
-
"""Tokens used in processing requests."""
|
|
25
|
-
response_tokens: int | None = None
|
|
26
|
-
"""Tokens used in generating responses."""
|
|
27
|
-
total_tokens: int | None = None
|
|
28
|
-
"""Total tokens used in the whole run, should generally be equal to `request_tokens + response_tokens`."""
|
|
29
|
-
details: dict[str, int] | None = None
|
|
30
|
-
"""Any extra details returned by the model."""
|
|
25
|
+
output_tokens: int = 0
|
|
26
|
+
"""Number of output/completion tokens."""
|
|
31
27
|
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
input_audio_tokens: int = 0
|
|
29
|
+
"""Number of audio input tokens."""
|
|
30
|
+
cache_audio_read_tokens: int = 0
|
|
31
|
+
"""Number of audio tokens read from the cache."""
|
|
32
|
+
output_audio_tokens: int = 0
|
|
33
|
+
"""Number of audio output tokens."""
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
"""
|
|
38
|
-
for f in 'requests', 'request_tokens', 'response_tokens', 'total_tokens':
|
|
39
|
-
self_value = getattr(self, f)
|
|
40
|
-
other_value = getattr(incr_usage, f)
|
|
41
|
-
if self_value is not None or other_value is not None:
|
|
42
|
-
setattr(self, f, (self_value or 0) + (other_value or 0))
|
|
35
|
+
details: dict[str, int] = dataclasses.field(default_factory=dict)
|
|
36
|
+
"""Any extra details returned by the model."""
|
|
43
37
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
38
|
+
@property
|
|
39
|
+
@deprecated('`request_tokens` is deprecated, use `input_tokens` instead')
|
|
40
|
+
def request_tokens(self) -> int:
|
|
41
|
+
return self.input_tokens
|
|
48
42
|
|
|
49
|
-
|
|
50
|
-
|
|
43
|
+
@property
|
|
44
|
+
@deprecated('`response_tokens` is deprecated, use `output_tokens` instead')
|
|
45
|
+
def response_tokens(self) -> int:
|
|
46
|
+
return self.output_tokens
|
|
51
47
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
return new_usage
|
|
48
|
+
@property
|
|
49
|
+
def total_tokens(self) -> int:
|
|
50
|
+
"""Sum of `input_tokens + output_tokens`."""
|
|
51
|
+
return self.input_tokens + self.output_tokens
|
|
57
52
|
|
|
58
53
|
def opentelemetry_attributes(self) -> dict[str, int]:
|
|
59
|
-
"""Get the token
|
|
54
|
+
"""Get the token usage values as OpenTelemetry attributes."""
|
|
60
55
|
result: dict[str, int] = {}
|
|
61
|
-
if self.
|
|
62
|
-
result['gen_ai.usage.input_tokens'] = self.
|
|
63
|
-
if self.
|
|
64
|
-
result['gen_ai.usage.output_tokens'] = self.
|
|
56
|
+
if self.input_tokens:
|
|
57
|
+
result['gen_ai.usage.input_tokens'] = self.input_tokens
|
|
58
|
+
if self.output_tokens:
|
|
59
|
+
result['gen_ai.usage.output_tokens'] = self.output_tokens
|
|
65
60
|
details = self.details
|
|
66
61
|
if details:
|
|
67
62
|
prefix = 'gen_ai.usage.details.'
|
|
@@ -71,11 +66,118 @@ class Usage:
|
|
|
71
66
|
result[prefix + key] = value
|
|
72
67
|
return result
|
|
73
68
|
|
|
69
|
+
def __repr__(self):
|
|
70
|
+
kv_pairs = (f'{f.name}={value!r}' for f in fields(self) if (value := getattr(self, f.name)))
|
|
71
|
+
return f'{self.__class__.__qualname__}({", ".join(kv_pairs)})'
|
|
72
|
+
|
|
74
73
|
def has_values(self) -> bool:
|
|
75
74
|
"""Whether any values are set and non-zero."""
|
|
76
|
-
return
|
|
75
|
+
return any(dataclasses.asdict(self).values())
|
|
77
76
|
|
|
78
|
-
|
|
77
|
+
|
|
78
|
+
@dataclass(repr=False)
|
|
79
|
+
class RequestUsage(UsageBase):
|
|
80
|
+
"""LLM usage associated with a single request.
|
|
81
|
+
|
|
82
|
+
This is an implementation of `genai_prices.types.AbstractUsage` so it can be used to calculate the price of the
|
|
83
|
+
request using [genai-prices](https://github.com/pydantic/genai-prices).
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def requests(self):
|
|
88
|
+
return 1
|
|
89
|
+
|
|
90
|
+
def incr(self, incr_usage: RequestUsage) -> None:
|
|
91
|
+
"""Increment the usage in place.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
incr_usage: The usage to increment by.
|
|
95
|
+
"""
|
|
96
|
+
return _incr_usage_tokens(self, incr_usage)
|
|
97
|
+
|
|
98
|
+
def __add__(self, other: RequestUsage) -> RequestUsage:
|
|
99
|
+
"""Add two RequestUsages together.
|
|
100
|
+
|
|
101
|
+
This is provided so it's trivial to sum usage information from multiple parts of a response.
|
|
102
|
+
|
|
103
|
+
**WARNING:** this CANNOT be used to sum multiple requests without breaking some pricing calculations.
|
|
104
|
+
"""
|
|
105
|
+
new_usage = copy(self)
|
|
106
|
+
new_usage.incr(other)
|
|
107
|
+
return new_usage
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass(repr=False)
|
|
111
|
+
class RunUsage(UsageBase):
|
|
112
|
+
"""LLM usage associated with an agent run.
|
|
113
|
+
|
|
114
|
+
Responsibility for calculating request usage is on the model; Pydantic AI simply sums the usage information across requests.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
requests: int = 0
|
|
118
|
+
"""Number of requests made to the LLM API."""
|
|
119
|
+
|
|
120
|
+
input_tokens: int = 0
|
|
121
|
+
"""Total number of text input/prompt tokens."""
|
|
122
|
+
|
|
123
|
+
cache_write_tokens: int = 0
|
|
124
|
+
"""Total number of tokens written to the cache."""
|
|
125
|
+
cache_read_tokens: int = 0
|
|
126
|
+
"""Total number of tokens read from the cache."""
|
|
127
|
+
|
|
128
|
+
input_audio_tokens: int = 0
|
|
129
|
+
"""Total number of audio input tokens."""
|
|
130
|
+
cache_audio_read_tokens: int = 0
|
|
131
|
+
"""Total number of audio tokens read from the cache."""
|
|
132
|
+
|
|
133
|
+
output_tokens: int = 0
|
|
134
|
+
"""Total number of text output/completion tokens."""
|
|
135
|
+
|
|
136
|
+
details: dict[str, int] = dataclasses.field(default_factory=dict)
|
|
137
|
+
"""Any extra details returned by the model."""
|
|
138
|
+
|
|
139
|
+
def incr(self, incr_usage: RunUsage | RequestUsage) -> None:
|
|
140
|
+
"""Increment the usage in place.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
incr_usage: The usage to increment by.
|
|
144
|
+
"""
|
|
145
|
+
if isinstance(incr_usage, RunUsage):
|
|
146
|
+
self.requests += incr_usage.requests
|
|
147
|
+
return _incr_usage_tokens(self, incr_usage)
|
|
148
|
+
|
|
149
|
+
def __add__(self, other: RunUsage | RequestUsage) -> RunUsage:
|
|
150
|
+
"""Add two RunUsages together.
|
|
151
|
+
|
|
152
|
+
This is provided so it's trivial to sum usage information from multiple runs.
|
|
153
|
+
"""
|
|
154
|
+
new_usage = copy(self)
|
|
155
|
+
new_usage.incr(other)
|
|
156
|
+
return new_usage
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _incr_usage_tokens(slf: RunUsage | RequestUsage, incr_usage: RunUsage | RequestUsage) -> None:
|
|
160
|
+
"""Increment the usage in place.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
slf: The usage to increment.
|
|
164
|
+
incr_usage: The usage to increment by.
|
|
165
|
+
"""
|
|
166
|
+
slf.input_tokens += incr_usage.input_tokens
|
|
167
|
+
slf.cache_write_tokens += incr_usage.cache_write_tokens
|
|
168
|
+
slf.cache_read_tokens += incr_usage.cache_read_tokens
|
|
169
|
+
slf.input_audio_tokens += incr_usage.input_audio_tokens
|
|
170
|
+
slf.cache_audio_read_tokens += incr_usage.cache_audio_read_tokens
|
|
171
|
+
slf.output_tokens += incr_usage.output_tokens
|
|
172
|
+
|
|
173
|
+
for key, value in incr_usage.details.items():
|
|
174
|
+
slf.details[key] = slf.details.get(key, 0) + value
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
@deprecated('`Usage` is deprecated, use `RunUsage` instead')
|
|
179
|
+
class Usage(RunUsage):
|
|
180
|
+
"""Deprecated alias for `RunUsage`."""
|
|
79
181
|
|
|
80
182
|
|
|
81
183
|
@dataclass(repr=False)
|
|
@@ -90,10 +192,10 @@ class UsageLimits:
|
|
|
90
192
|
|
|
91
193
|
request_limit: int | None = 50
|
|
92
194
|
"""The maximum number of requests allowed to the model."""
|
|
93
|
-
|
|
94
|
-
"""The maximum number of tokens allowed
|
|
95
|
-
|
|
96
|
-
"""The maximum number of tokens allowed
|
|
195
|
+
input_tokens_limit: int | None = None
|
|
196
|
+
"""The maximum number of input/prompt tokens allowed."""
|
|
197
|
+
output_tokens_limit: int | None = None
|
|
198
|
+
"""The maximum number of output/response tokens allowed."""
|
|
97
199
|
total_tokens_limit: int | None = None
|
|
98
200
|
"""The maximum number of tokens allowed in requests and responses combined."""
|
|
99
201
|
count_tokens_before_request: bool = False
|
|
@@ -101,6 +203,69 @@ class UsageLimits:
|
|
|
101
203
|
to enforce `request_tokens_limit` ahead of time. This may incur additional overhead
|
|
102
204
|
(from calling the model's `count_tokens` API before making the actual request) and is disabled by default."""
|
|
103
205
|
|
|
206
|
+
@property
|
|
207
|
+
@deprecated('`request_tokens_limit` is deprecated, use `input_tokens_limit` instead')
|
|
208
|
+
def request_tokens_limit(self) -> int | None:
|
|
209
|
+
return self.input_tokens_limit
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
@deprecated('`response_tokens_limit` is deprecated, use `output_tokens_limit` instead')
|
|
213
|
+
def response_tokens_limit(self) -> int | None:
|
|
214
|
+
return self.output_tokens_limit
|
|
215
|
+
|
|
216
|
+
@overload
|
|
217
|
+
def __init__(
|
|
218
|
+
self,
|
|
219
|
+
*,
|
|
220
|
+
request_limit: int | None = 50,
|
|
221
|
+
input_tokens_limit: int | None = None,
|
|
222
|
+
output_tokens_limit: int | None = None,
|
|
223
|
+
total_tokens_limit: int | None = None,
|
|
224
|
+
count_tokens_before_request: bool = False,
|
|
225
|
+
) -> None:
|
|
226
|
+
self.request_limit = request_limit
|
|
227
|
+
self.input_tokens_limit = input_tokens_limit
|
|
228
|
+
self.output_tokens_limit = output_tokens_limit
|
|
229
|
+
self.total_tokens_limit = total_tokens_limit
|
|
230
|
+
self.count_tokens_before_request = count_tokens_before_request
|
|
231
|
+
|
|
232
|
+
@overload
|
|
233
|
+
@deprecated(
|
|
234
|
+
'Use `input_tokens_limit` instead of `request_tokens_limit` and `output_tokens_limit` and `total_tokens_limit`'
|
|
235
|
+
)
|
|
236
|
+
def __init__(
|
|
237
|
+
self,
|
|
238
|
+
*,
|
|
239
|
+
request_limit: int | None = 50,
|
|
240
|
+
request_tokens_limit: int | None = None,
|
|
241
|
+
response_tokens_limit: int | None = None,
|
|
242
|
+
total_tokens_limit: int | None = None,
|
|
243
|
+
count_tokens_before_request: bool = False,
|
|
244
|
+
) -> None:
|
|
245
|
+
self.request_limit = request_limit
|
|
246
|
+
self.input_tokens_limit = request_tokens_limit
|
|
247
|
+
self.output_tokens_limit = response_tokens_limit
|
|
248
|
+
self.total_tokens_limit = total_tokens_limit
|
|
249
|
+
self.count_tokens_before_request = count_tokens_before_request
|
|
250
|
+
|
|
251
|
+
def __init__(
|
|
252
|
+
self,
|
|
253
|
+
*,
|
|
254
|
+
request_limit: int | None = 50,
|
|
255
|
+
input_tokens_limit: int | None = None,
|
|
256
|
+
output_tokens_limit: int | None = None,
|
|
257
|
+
total_tokens_limit: int | None = None,
|
|
258
|
+
count_tokens_before_request: bool = False,
|
|
259
|
+
# deprecated:
|
|
260
|
+
request_tokens_limit: int | None = None,
|
|
261
|
+
response_tokens_limit: int | None = None,
|
|
262
|
+
):
|
|
263
|
+
self.request_limit = request_limit
|
|
264
|
+
self.input_tokens_limit = input_tokens_limit or request_tokens_limit
|
|
265
|
+
self.output_tokens_limit = output_tokens_limit or response_tokens_limit
|
|
266
|
+
self.total_tokens_limit = total_tokens_limit
|
|
267
|
+
self.count_tokens_before_request = count_tokens_before_request
|
|
268
|
+
|
|
104
269
|
def has_token_limits(self) -> bool:
|
|
105
270
|
"""Returns `True` if this instance places any limits on token counts.
|
|
106
271
|
|
|
@@ -110,43 +275,40 @@ class UsageLimits:
|
|
|
110
275
|
If there are no limits, we can skip that processing in the streaming response iterator.
|
|
111
276
|
"""
|
|
112
277
|
return any(
|
|
113
|
-
limit is not None
|
|
114
|
-
for limit in (self.request_tokens_limit, self.response_tokens_limit, self.total_tokens_limit)
|
|
278
|
+
limit is not None for limit in (self.input_tokens_limit, self.output_tokens_limit, self.total_tokens_limit)
|
|
115
279
|
)
|
|
116
280
|
|
|
117
|
-
def check_before_request(self, usage:
|
|
281
|
+
def check_before_request(self, usage: RunUsage) -> None:
|
|
118
282
|
"""Raises a `UsageLimitExceeded` exception if the next request would exceed any of the limits."""
|
|
119
283
|
request_limit = self.request_limit
|
|
120
284
|
if request_limit is not None and usage.requests >= request_limit:
|
|
121
285
|
raise UsageLimitExceeded(f'The next request would exceed the request_limit of {request_limit}')
|
|
122
286
|
|
|
123
|
-
|
|
124
|
-
if self.
|
|
287
|
+
input_tokens = usage.input_tokens
|
|
288
|
+
if self.input_tokens_limit is not None and input_tokens > self.input_tokens_limit:
|
|
125
289
|
raise UsageLimitExceeded(
|
|
126
|
-
f'The next request would exceed the
|
|
290
|
+
f'The next request would exceed the input_tokens_limit of {self.input_tokens_limit} ({input_tokens=})'
|
|
127
291
|
)
|
|
128
292
|
|
|
129
|
-
total_tokens = usage.total_tokens
|
|
293
|
+
total_tokens = usage.total_tokens
|
|
130
294
|
if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
|
|
131
295
|
raise UsageLimitExceeded(
|
|
132
296
|
f'The next request would exceed the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})'
|
|
133
297
|
)
|
|
134
298
|
|
|
135
|
-
def check_tokens(self, usage:
|
|
299
|
+
def check_tokens(self, usage: RunUsage) -> None:
|
|
136
300
|
"""Raises a `UsageLimitExceeded` exception if the usage exceeds any of the token limits."""
|
|
137
|
-
|
|
138
|
-
if self.
|
|
139
|
-
raise UsageLimitExceeded(
|
|
140
|
-
f'Exceeded the request_tokens_limit of {self.request_tokens_limit} ({request_tokens=})'
|
|
141
|
-
)
|
|
301
|
+
input_tokens = usage.input_tokens
|
|
302
|
+
if self.input_tokens_limit is not None and input_tokens > self.input_tokens_limit:
|
|
303
|
+
raise UsageLimitExceeded(f'Exceeded the input_tokens_limit of {self.input_tokens_limit} ({input_tokens=})')
|
|
142
304
|
|
|
143
|
-
|
|
144
|
-
if self.
|
|
305
|
+
output_tokens = usage.output_tokens
|
|
306
|
+
if self.output_tokens_limit is not None and output_tokens > self.output_tokens_limit:
|
|
145
307
|
raise UsageLimitExceeded(
|
|
146
|
-
f'Exceeded the
|
|
308
|
+
f'Exceeded the output_tokens_limit of {self.output_tokens_limit} ({output_tokens=})'
|
|
147
309
|
)
|
|
148
310
|
|
|
149
|
-
total_tokens = usage.total_tokens
|
|
311
|
+
total_tokens = usage.total_tokens
|
|
150
312
|
if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
|
|
151
313
|
raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')
|
|
152
314
|
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-ai-slim
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Agent Framework / shim to use Pydantic with LLMs, slim package
|
|
5
|
+
Project-URL: Homepage, https://github.com/pydantic/pydantic-ai/tree/main/pydantic_ai_slim
|
|
6
|
+
Project-URL: Source, https://github.com/pydantic/pydantic-ai/tree/main/pydantic_ai_slim
|
|
7
|
+
Project-URL: Documentation, https://ai.pydantic.dev/install/#slim-install
|
|
8
|
+
Project-URL: Changelog, https://github.com/pydantic/pydantic-ai/releases
|
|
5
9
|
Author-email: Samuel Colvin <samuel@pydantic.dev>, Marcelo Trylesinski <marcelotryle@gmail.com>, David Montague <david@pydantic.dev>, Alex Hall <alex@pydantic.dev>, Douwe Maan <douwe@pydantic.dev>
|
|
6
10
|
License-Expression: MIT
|
|
7
11
|
License-File: LICENSE
|
|
@@ -27,10 +31,11 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
27
31
|
Requires-Python: >=3.9
|
|
28
32
|
Requires-Dist: eval-type-backport>=0.2.0
|
|
29
33
|
Requires-Dist: exceptiongroup; python_version < '3.11'
|
|
34
|
+
Requires-Dist: genai-prices>=0.0.22
|
|
30
35
|
Requires-Dist: griffe>=1.3.2
|
|
31
36
|
Requires-Dist: httpx>=0.27
|
|
32
37
|
Requires-Dist: opentelemetry-api>=1.28.0
|
|
33
|
-
Requires-Dist: pydantic-graph==0.7.
|
|
38
|
+
Requires-Dist: pydantic-graph==0.7.3
|
|
34
39
|
Requires-Dist: pydantic>=2.10
|
|
35
40
|
Requires-Dist: typing-inspection>=0.4.0
|
|
36
41
|
Provides-Extra: a2a
|
|
@@ -45,13 +50,14 @@ Requires-Dist: boto3>=1.39.0; extra == 'bedrock'
|
|
|
45
50
|
Provides-Extra: cli
|
|
46
51
|
Requires-Dist: argcomplete>=3.5.0; extra == 'cli'
|
|
47
52
|
Requires-Dist: prompt-toolkit>=3; extra == 'cli'
|
|
53
|
+
Requires-Dist: pyperclip>=1.9.0; extra == 'cli'
|
|
48
54
|
Requires-Dist: rich>=13; extra == 'cli'
|
|
49
55
|
Provides-Extra: cohere
|
|
50
56
|
Requires-Dist: cohere>=5.16.0; (platform_system != 'Emscripten') and extra == 'cohere'
|
|
51
57
|
Provides-Extra: duckduckgo
|
|
52
58
|
Requires-Dist: ddgs>=9.0.0; extra == 'duckduckgo'
|
|
53
59
|
Provides-Extra: evals
|
|
54
|
-
Requires-Dist: pydantic-evals==0.7.
|
|
60
|
+
Requires-Dist: pydantic-evals==0.7.3; extra == 'evals'
|
|
55
61
|
Provides-Extra: google
|
|
56
62
|
Requires-Dist: google-genai>=1.28.0; extra == 'google'
|
|
57
63
|
Provides-Extra: groq
|
|
@@ -71,7 +77,7 @@ Requires-Dist: tenacity>=8.2.3; extra == 'retries'
|
|
|
71
77
|
Provides-Extra: tavily
|
|
72
78
|
Requires-Dist: tavily-python>=0.5.0; extra == 'tavily'
|
|
73
79
|
Provides-Extra: temporal
|
|
74
|
-
Requires-Dist: temporalio
|
|
80
|
+
Requires-Dist: temporalio==1.15.0; extra == 'temporal'
|
|
75
81
|
Provides-Extra: vertexai
|
|
76
82
|
Requires-Dist: google-auth>=2.36.0; extra == 'vertexai'
|
|
77
83
|
Requires-Dist: requests>=2.32.2; extra == 'vertexai'
|