pydantic-ai-slim 1.2.1__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydantic_ai/__init__.py +6 -0
- pydantic_ai/_agent_graph.py +67 -20
- pydantic_ai/_cli.py +2 -2
- pydantic_ai/_output.py +20 -12
- pydantic_ai/_run_context.py +6 -2
- pydantic_ai/_utils.py +26 -8
- pydantic_ai/ag_ui.py +50 -696
- pydantic_ai/agent/__init__.py +13 -25
- pydantic_ai/agent/abstract.py +146 -9
- pydantic_ai/builtin_tools.py +106 -4
- pydantic_ai/direct.py +16 -4
- pydantic_ai/durable_exec/dbos/_agent.py +3 -0
- pydantic_ai/durable_exec/prefect/_agent.py +3 -0
- pydantic_ai/durable_exec/temporal/__init__.py +11 -0
- pydantic_ai/durable_exec/temporal/_agent.py +3 -0
- pydantic_ai/durable_exec/temporal/_function_toolset.py +23 -72
- pydantic_ai/durable_exec/temporal/_mcp_server.py +30 -30
- pydantic_ai/durable_exec/temporal/_run_context.py +7 -2
- pydantic_ai/durable_exec/temporal/_toolset.py +67 -3
- pydantic_ai/exceptions.py +6 -1
- pydantic_ai/mcp.py +1 -22
- pydantic_ai/messages.py +46 -8
- pydantic_ai/models/__init__.py +87 -38
- pydantic_ai/models/anthropic.py +132 -11
- pydantic_ai/models/bedrock.py +4 -4
- pydantic_ai/models/cohere.py +0 -7
- pydantic_ai/models/gemini.py +9 -2
- pydantic_ai/models/google.py +26 -23
- pydantic_ai/models/groq.py +13 -5
- pydantic_ai/models/huggingface.py +2 -2
- pydantic_ai/models/openai.py +251 -52
- pydantic_ai/models/outlines.py +563 -0
- pydantic_ai/models/test.py +6 -3
- pydantic_ai/profiles/openai.py +7 -0
- pydantic_ai/providers/__init__.py +25 -12
- pydantic_ai/providers/anthropic.py +2 -2
- pydantic_ai/providers/bedrock.py +60 -16
- pydantic_ai/providers/gateway.py +60 -72
- pydantic_ai/providers/google.py +91 -24
- pydantic_ai/providers/openrouter.py +3 -0
- pydantic_ai/providers/outlines.py +40 -0
- pydantic_ai/providers/ovhcloud.py +95 -0
- pydantic_ai/result.py +173 -8
- pydantic_ai/run.py +40 -24
- pydantic_ai/settings.py +8 -0
- pydantic_ai/tools.py +10 -6
- pydantic_ai/toolsets/fastmcp.py +215 -0
- pydantic_ai/ui/__init__.py +16 -0
- pydantic_ai/ui/_adapter.py +386 -0
- pydantic_ai/ui/_event_stream.py +591 -0
- pydantic_ai/ui/_messages_builder.py +28 -0
- pydantic_ai/ui/ag_ui/__init__.py +9 -0
- pydantic_ai/ui/ag_ui/_adapter.py +187 -0
- pydantic_ai/ui/ag_ui/_event_stream.py +236 -0
- pydantic_ai/ui/ag_ui/app.py +148 -0
- pydantic_ai/ui/vercel_ai/__init__.py +16 -0
- pydantic_ai/ui/vercel_ai/_adapter.py +199 -0
- pydantic_ai/ui/vercel_ai/_event_stream.py +187 -0
- pydantic_ai/ui/vercel_ai/_utils.py +16 -0
- pydantic_ai/ui/vercel_ai/request_types.py +275 -0
- pydantic_ai/ui/vercel_ai/response_types.py +230 -0
- pydantic_ai/usage.py +13 -2
- {pydantic_ai_slim-1.2.1.dist-info → pydantic_ai_slim-1.10.0.dist-info}/METADATA +23 -5
- {pydantic_ai_slim-1.2.1.dist-info → pydantic_ai_slim-1.10.0.dist-info}/RECORD +67 -49
- {pydantic_ai_slim-1.2.1.dist-info → pydantic_ai_slim-1.10.0.dist-info}/WHEEL +0 -0
- {pydantic_ai_slim-1.2.1.dist-info → pydantic_ai_slim-1.10.0.dist-info}/entry_points.txt +0 -0
- {pydantic_ai_slim-1.2.1.dist-info → pydantic_ai_slim-1.10.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
# There are linting and coverage escapes for MLXLM and VLLMOffline as the CI would not contain the right
|
|
2
|
+
# environment to be able to run the associated tests
|
|
3
|
+
|
|
4
|
+
# pyright: reportUnnecessaryTypeIgnoreComment = false
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import io
|
|
9
|
+
from collections.abc import AsyncIterable, AsyncIterator, Sequence
|
|
10
|
+
from contextlib import asynccontextmanager
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
14
|
+
|
|
15
|
+
from typing_extensions import assert_never
|
|
16
|
+
|
|
17
|
+
from .. import UnexpectedModelBehavior, _utils
|
|
18
|
+
from .._output import PromptedOutputSchema
|
|
19
|
+
from .._run_context import RunContext
|
|
20
|
+
from .._thinking_part import split_content_into_text_and_thinking
|
|
21
|
+
from ..exceptions import UserError
|
|
22
|
+
from ..messages import (
|
|
23
|
+
BinaryContent,
|
|
24
|
+
BuiltinToolCallPart,
|
|
25
|
+
BuiltinToolReturnPart,
|
|
26
|
+
FilePart,
|
|
27
|
+
ImageUrl,
|
|
28
|
+
ModelMessage,
|
|
29
|
+
ModelRequest,
|
|
30
|
+
ModelResponse,
|
|
31
|
+
ModelResponsePart,
|
|
32
|
+
ModelResponseStreamEvent,
|
|
33
|
+
RetryPromptPart,
|
|
34
|
+
SystemPromptPart,
|
|
35
|
+
TextPart,
|
|
36
|
+
ThinkingPart,
|
|
37
|
+
ToolCallPart,
|
|
38
|
+
ToolReturnPart,
|
|
39
|
+
UserPromptPart,
|
|
40
|
+
)
|
|
41
|
+
from ..profiles import ModelProfile, ModelProfileSpec
|
|
42
|
+
from ..providers import Provider, infer_provider
|
|
43
|
+
from ..settings import ModelSettings
|
|
44
|
+
from . import (
|
|
45
|
+
DownloadedItem,
|
|
46
|
+
Model,
|
|
47
|
+
ModelRequestParameters,
|
|
48
|
+
StreamedResponse,
|
|
49
|
+
download_item,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
from outlines.inputs import Chat, Image
|
|
54
|
+
from outlines.models.base import AsyncModel as OutlinesAsyncBaseModel, Model as OutlinesBaseModel
|
|
55
|
+
from outlines.models.llamacpp import LlamaCpp, from_llamacpp
|
|
56
|
+
from outlines.models.mlxlm import MLXLM, from_mlxlm
|
|
57
|
+
from outlines.models.sglang import AsyncSGLang, SGLang, from_sglang
|
|
58
|
+
from outlines.models.transformers import (
|
|
59
|
+
Transformers,
|
|
60
|
+
from_transformers,
|
|
61
|
+
)
|
|
62
|
+
from outlines.models.vllm_offline import (
|
|
63
|
+
VLLMOffline,
|
|
64
|
+
from_vllm_offline, # pyright: ignore[reportUnknownVariableType]
|
|
65
|
+
)
|
|
66
|
+
from outlines.types.dsl import JsonSchema
|
|
67
|
+
from PIL import Image as PILImage
|
|
68
|
+
except ImportError as _import_error:
|
|
69
|
+
raise ImportError(
|
|
70
|
+
'Please install `outlines` to use the Outlines model, '
|
|
71
|
+
'you can use the `outlines` optional group — `pip install "pydantic-ai-slim[outlines]"`'
|
|
72
|
+
) from _import_error
|
|
73
|
+
|
|
74
|
+
if TYPE_CHECKING:
|
|
75
|
+
import llama_cpp
|
|
76
|
+
import mlx.nn as nn
|
|
77
|
+
import transformers
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass(init=False)
|
|
81
|
+
class OutlinesModel(Model):
|
|
82
|
+
"""A model that relies on the Outlines library to run non API-based models."""
|
|
83
|
+
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
model: OutlinesBaseModel | OutlinesAsyncBaseModel,
|
|
87
|
+
*,
|
|
88
|
+
provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
|
|
89
|
+
profile: ModelProfileSpec | None = None,
|
|
90
|
+
settings: ModelSettings | None = None,
|
|
91
|
+
):
|
|
92
|
+
"""Initialize an Outlines model.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
model: The Outlines model used for the model.
|
|
96
|
+
provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
|
|
97
|
+
instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
|
|
98
|
+
profile: The model profile to use. Defaults to a profile picked by the provider.
|
|
99
|
+
settings: Default model settings for this model instance.
|
|
100
|
+
"""
|
|
101
|
+
self.model: OutlinesBaseModel | OutlinesAsyncBaseModel = model
|
|
102
|
+
self._model_name: str = 'outlines-model'
|
|
103
|
+
|
|
104
|
+
if isinstance(provider, str):
|
|
105
|
+
provider = infer_provider(provider)
|
|
106
|
+
|
|
107
|
+
super().__init__(settings=settings, profile=profile or provider.model_profile)
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_transformers(
|
|
111
|
+
cls,
|
|
112
|
+
hf_model: transformers.modeling_utils.PreTrainedModel,
|
|
113
|
+
hf_tokenizer_or_processor: transformers.tokenization_utils.PreTrainedTokenizer
|
|
114
|
+
| transformers.processing_utils.ProcessorMixin,
|
|
115
|
+
*,
|
|
116
|
+
provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
|
|
117
|
+
profile: ModelProfileSpec | None = None,
|
|
118
|
+
settings: ModelSettings | None = None,
|
|
119
|
+
):
|
|
120
|
+
"""Create an Outlines model from a Hugging Face model and tokenizer.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
hf_model: The Hugging Face PreTrainedModel or any model that is compatible with the
|
|
124
|
+
`transformers` API.
|
|
125
|
+
hf_tokenizer_or_processor: Either a HuggingFace `PreTrainedTokenizer` or any tokenizer that is compatible
|
|
126
|
+
with the `transformers` API, or a HuggingFace processor inheriting from `ProcessorMixin`. If a
|
|
127
|
+
tokenizer is provided, a regular model will be used, while if you provide a processor, it will be a
|
|
128
|
+
multimodal model.
|
|
129
|
+
provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
|
|
130
|
+
instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
|
|
131
|
+
profile: The model profile to use. Defaults to a profile picked by the provider.
|
|
132
|
+
settings: Default model settings for this model instance.
|
|
133
|
+
"""
|
|
134
|
+
outlines_model: OutlinesBaseModel = from_transformers(hf_model, hf_tokenizer_or_processor)
|
|
135
|
+
return cls(outlines_model, provider=provider, profile=profile, settings=settings)
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def from_llamacpp(
|
|
139
|
+
cls,
|
|
140
|
+
llama_model: llama_cpp.Llama,
|
|
141
|
+
*,
|
|
142
|
+
provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
|
|
143
|
+
profile: ModelProfileSpec | None = None,
|
|
144
|
+
settings: ModelSettings | None = None,
|
|
145
|
+
):
|
|
146
|
+
"""Create an Outlines model from a LlamaCpp model.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
llama_model: The llama_cpp.Llama model to use.
|
|
150
|
+
provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
|
|
151
|
+
instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
|
|
152
|
+
profile: The model profile to use. Defaults to a profile picked by the provider.
|
|
153
|
+
settings: Default model settings for this model instance.
|
|
154
|
+
"""
|
|
155
|
+
outlines_model: OutlinesBaseModel = from_llamacpp(llama_model)
|
|
156
|
+
return cls(outlines_model, provider=provider, profile=profile, settings=settings)
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def from_mlxlm( # pragma: no cover
|
|
160
|
+
cls,
|
|
161
|
+
mlx_model: nn.Module,
|
|
162
|
+
mlx_tokenizer: transformers.tokenization_utils.PreTrainedTokenizer,
|
|
163
|
+
*,
|
|
164
|
+
provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
|
|
165
|
+
profile: ModelProfileSpec | None = None,
|
|
166
|
+
settings: ModelSettings | None = None,
|
|
167
|
+
):
|
|
168
|
+
"""Create an Outlines model from a MLXLM model.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
mlx_model: The nn.Module model to use.
|
|
172
|
+
mlx_tokenizer: The PreTrainedTokenizer to use.
|
|
173
|
+
provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
|
|
174
|
+
instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
|
|
175
|
+
profile: The model profile to use. Defaults to a profile picked by the provider.
|
|
176
|
+
settings: Default model settings for this model instance.
|
|
177
|
+
"""
|
|
178
|
+
outlines_model: OutlinesBaseModel = from_mlxlm(mlx_model, mlx_tokenizer)
|
|
179
|
+
return cls(outlines_model, provider=provider, profile=profile, settings=settings)
|
|
180
|
+
|
|
181
|
+
@classmethod
|
|
182
|
+
def from_sglang(
|
|
183
|
+
cls,
|
|
184
|
+
base_url: str,
|
|
185
|
+
api_key: str | None = None,
|
|
186
|
+
model_name: str | None = None,
|
|
187
|
+
*,
|
|
188
|
+
provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
|
|
189
|
+
profile: ModelProfileSpec | None = None,
|
|
190
|
+
settings: ModelSettings | None = None,
|
|
191
|
+
):
|
|
192
|
+
"""Create an Outlines model to send requests to an SGLang server.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
base_url: The url of the SGLang server.
|
|
196
|
+
api_key: The API key to use for authenticating requests to the SGLang server.
|
|
197
|
+
model_name: The name of the model to use.
|
|
198
|
+
provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
|
|
199
|
+
instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
|
|
200
|
+
profile: The model profile to use. Defaults to a profile picked by the provider.
|
|
201
|
+
settings: Default model settings for this model instance.
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
from openai import AsyncOpenAI
|
|
205
|
+
except ImportError as _import_error:
|
|
206
|
+
raise ImportError(
|
|
207
|
+
'Please install `openai` to use the Outlines SGLang model, '
|
|
208
|
+
'you can use the `openai` optional group — `pip install "pydantic-ai-slim[openai]"`'
|
|
209
|
+
) from _import_error
|
|
210
|
+
|
|
211
|
+
openai_client = AsyncOpenAI(base_url=base_url, api_key=api_key)
|
|
212
|
+
outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_sglang(openai_client, model_name)
|
|
213
|
+
return cls(outlines_model, provider=provider, profile=profile, settings=settings)
|
|
214
|
+
|
|
215
|
+
@classmethod
|
|
216
|
+
def from_vllm_offline( # pragma: no cover
|
|
217
|
+
cls,
|
|
218
|
+
vllm_model: Any,
|
|
219
|
+
*,
|
|
220
|
+
provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
|
|
221
|
+
profile: ModelProfileSpec | None = None,
|
|
222
|
+
settings: ModelSettings | None = None,
|
|
223
|
+
):
|
|
224
|
+
"""Create an Outlines model from a vLLM offline inference model.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
vllm_model: The vllm.LLM local model to use.
|
|
228
|
+
provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
|
|
229
|
+
instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
|
|
230
|
+
profile: The model profile to use. Defaults to a profile picked by the provider.
|
|
231
|
+
settings: Default model settings for this model instance.
|
|
232
|
+
"""
|
|
233
|
+
outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_vllm_offline(vllm_model)
|
|
234
|
+
return cls(outlines_model, provider=provider, profile=profile, settings=settings)
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def model_name(self) -> str:
|
|
238
|
+
return self._model_name
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def system(self) -> str:
|
|
242
|
+
return 'outlines'
|
|
243
|
+
|
|
244
|
+
async def request(
|
|
245
|
+
self,
|
|
246
|
+
messages: list[ModelMessage],
|
|
247
|
+
model_settings: ModelSettings | None,
|
|
248
|
+
model_request_parameters: ModelRequestParameters,
|
|
249
|
+
) -> ModelResponse:
|
|
250
|
+
"""Make a request to the model."""
|
|
251
|
+
prompt, output_type, inference_kwargs = await self._build_generation_arguments(
|
|
252
|
+
messages, model_settings, model_request_parameters
|
|
253
|
+
)
|
|
254
|
+
# Async is available for SgLang
|
|
255
|
+
response: str
|
|
256
|
+
if isinstance(self.model, OutlinesAsyncBaseModel):
|
|
257
|
+
response = await self.model(prompt, output_type, None, **inference_kwargs)
|
|
258
|
+
else:
|
|
259
|
+
response = self.model(prompt, output_type, None, **inference_kwargs)
|
|
260
|
+
return self._process_response(response)
|
|
261
|
+
|
|
262
|
+
@asynccontextmanager
|
|
263
|
+
async def request_stream(
|
|
264
|
+
self,
|
|
265
|
+
messages: list[ModelMessage],
|
|
266
|
+
model_settings: ModelSettings | None,
|
|
267
|
+
model_request_parameters: ModelRequestParameters,
|
|
268
|
+
run_context: RunContext[Any] | None = None,
|
|
269
|
+
) -> AsyncIterator[StreamedResponse]:
|
|
270
|
+
prompt, output_type, inference_kwargs = await self._build_generation_arguments(
|
|
271
|
+
messages, model_settings, model_request_parameters
|
|
272
|
+
)
|
|
273
|
+
# Async is available for SgLang
|
|
274
|
+
if isinstance(self.model, OutlinesAsyncBaseModel):
|
|
275
|
+
response = self.model.stream(prompt, output_type, None, **inference_kwargs)
|
|
276
|
+
yield await self._process_streamed_response(response, model_request_parameters)
|
|
277
|
+
else:
|
|
278
|
+
response = self.model.stream(prompt, output_type, None, **inference_kwargs)
|
|
279
|
+
|
|
280
|
+
async def async_response():
|
|
281
|
+
for chunk in response:
|
|
282
|
+
yield chunk
|
|
283
|
+
|
|
284
|
+
yield await self._process_streamed_response(async_response(), model_request_parameters)
|
|
285
|
+
|
|
286
|
+
async def _build_generation_arguments(
|
|
287
|
+
self,
|
|
288
|
+
messages: list[ModelMessage],
|
|
289
|
+
model_settings: ModelSettings | None,
|
|
290
|
+
model_request_parameters: ModelRequestParameters,
|
|
291
|
+
) -> tuple[Chat, JsonSchema | None, dict[str, Any]]:
|
|
292
|
+
"""Build the generation arguments for the model."""
|
|
293
|
+
if (
|
|
294
|
+
model_request_parameters.function_tools
|
|
295
|
+
or model_request_parameters.builtin_tools
|
|
296
|
+
or model_request_parameters.output_tools
|
|
297
|
+
):
|
|
298
|
+
raise UserError('Outlines does not support function tools and builtin tools yet.')
|
|
299
|
+
|
|
300
|
+
if model_request_parameters.output_object:
|
|
301
|
+
instructions = PromptedOutputSchema.build_instructions(
|
|
302
|
+
self.profile.prompted_output_template, model_request_parameters.output_object
|
|
303
|
+
)
|
|
304
|
+
output_type = JsonSchema(model_request_parameters.output_object.json_schema)
|
|
305
|
+
else:
|
|
306
|
+
instructions = None
|
|
307
|
+
output_type = None
|
|
308
|
+
|
|
309
|
+
prompt = await self._format_prompt(messages, instructions)
|
|
310
|
+
inference_kwargs = self.format_inference_kwargs(model_settings)
|
|
311
|
+
|
|
312
|
+
return prompt, output_type, inference_kwargs
|
|
313
|
+
|
|
314
|
+
def format_inference_kwargs(self, model_settings: ModelSettings | None) -> dict[str, Any]:
|
|
315
|
+
"""Format the model settings for the inference kwargs."""
|
|
316
|
+
settings_dict: dict[str, Any] = dict(model_settings) if model_settings else {}
|
|
317
|
+
|
|
318
|
+
if isinstance(self.model, Transformers):
|
|
319
|
+
settings_dict = self._format_transformers_inference_kwargs(settings_dict)
|
|
320
|
+
elif isinstance(self.model, LlamaCpp):
|
|
321
|
+
settings_dict = self._format_llama_cpp_inference_kwargs(settings_dict)
|
|
322
|
+
elif isinstance(self.model, MLXLM): # pragma: no cover
|
|
323
|
+
settings_dict = self._format_mlxlm_inference_kwargs(settings_dict)
|
|
324
|
+
elif isinstance(self.model, SGLang | AsyncSGLang):
|
|
325
|
+
settings_dict = self._format_sglang_inference_kwargs(settings_dict)
|
|
326
|
+
elif isinstance(self.model, VLLMOffline): # pragma: no cover
|
|
327
|
+
settings_dict = self._format_vllm_offline_inference_kwargs(settings_dict)
|
|
328
|
+
|
|
329
|
+
extra_body = settings_dict.pop('extra_body', {})
|
|
330
|
+
settings_dict.update(extra_body)
|
|
331
|
+
|
|
332
|
+
return settings_dict
|
|
333
|
+
|
|
334
|
+
def _format_transformers_inference_kwargs(self, model_settings: dict[str, Any]) -> dict[str, Any]:
|
|
335
|
+
"""Select the model settings supported by the Transformers model."""
|
|
336
|
+
supported_args = [
|
|
337
|
+
'max_tokens',
|
|
338
|
+
'temperature',
|
|
339
|
+
'top_p',
|
|
340
|
+
'logit_bias',
|
|
341
|
+
'extra_body',
|
|
342
|
+
]
|
|
343
|
+
filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}
|
|
344
|
+
|
|
345
|
+
return filtered_settings
|
|
346
|
+
|
|
347
|
+
def _format_llama_cpp_inference_kwargs(self, model_settings: dict[str, Any]) -> dict[str, Any]:
|
|
348
|
+
"""Select the model settings supported by the LlamaCpp model."""
|
|
349
|
+
supported_args = [
|
|
350
|
+
'max_tokens',
|
|
351
|
+
'temperature',
|
|
352
|
+
'top_p',
|
|
353
|
+
'seed',
|
|
354
|
+
'presence_penalty',
|
|
355
|
+
'frequency_penalty',
|
|
356
|
+
'logit_bias',
|
|
357
|
+
'extra_body',
|
|
358
|
+
]
|
|
359
|
+
filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}
|
|
360
|
+
|
|
361
|
+
return filtered_settings
|
|
362
|
+
|
|
363
|
+
def _format_mlxlm_inference_kwargs( # pragma: no cover
|
|
364
|
+
self, model_settings: dict[str, Any]
|
|
365
|
+
) -> dict[str, Any]:
|
|
366
|
+
"""Select the model settings supported by the MLXLM model."""
|
|
367
|
+
supported_args = [
|
|
368
|
+
'extra_body',
|
|
369
|
+
]
|
|
370
|
+
filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}
|
|
371
|
+
|
|
372
|
+
return filtered_settings
|
|
373
|
+
|
|
374
|
+
def _format_sglang_inference_kwargs(self, model_settings: dict[str, Any]) -> dict[str, Any]:
|
|
375
|
+
"""Select the model settings supported by the SGLang model."""
|
|
376
|
+
supported_args = [
|
|
377
|
+
'max_tokens',
|
|
378
|
+
'temperature',
|
|
379
|
+
'top_p',
|
|
380
|
+
'presence_penalty',
|
|
381
|
+
'frequency_penalty',
|
|
382
|
+
'extra_body',
|
|
383
|
+
]
|
|
384
|
+
filtered_settings = {k: model_settings[k] for k in supported_args if k in model_settings}
|
|
385
|
+
|
|
386
|
+
return filtered_settings
|
|
387
|
+
|
|
388
|
+
def _format_vllm_offline_inference_kwargs( # pragma: no cover
|
|
389
|
+
self, model_settings: dict[str, Any]
|
|
390
|
+
) -> dict[str, Any]:
|
|
391
|
+
"""Select the model settings supported by the vLLMOffline model."""
|
|
392
|
+
from vllm.sampling_params import SamplingParams # pyright: ignore
|
|
393
|
+
|
|
394
|
+
supported_args = [
|
|
395
|
+
'max_tokens',
|
|
396
|
+
'temperature',
|
|
397
|
+
'top_p',
|
|
398
|
+
'seed',
|
|
399
|
+
'presence_penalty',
|
|
400
|
+
'frequency_penalty',
|
|
401
|
+
'logit_bias',
|
|
402
|
+
'extra_body',
|
|
403
|
+
]
|
|
404
|
+
# The arguments that are part of the fields of `ModelSettings` must be put in a `SamplingParams` object and
|
|
405
|
+
# provided through the `sampling_params` argument to vLLM
|
|
406
|
+
sampling_params = model_settings.get('extra_body', {}).pop('sampling_params', SamplingParams())
|
|
407
|
+
|
|
408
|
+
for key in supported_args:
|
|
409
|
+
setattr(sampling_params, key, model_settings.get(key, None))
|
|
410
|
+
|
|
411
|
+
filtered_settings = {
|
|
412
|
+
'sampling_params': sampling_params,
|
|
413
|
+
**model_settings.get('extra_body', {}),
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return filtered_settings
|
|
417
|
+
|
|
418
|
+
async def _format_prompt( # noqa: C901
|
|
419
|
+
self, messages: list[ModelMessage], output_format_instructions: str | None
|
|
420
|
+
) -> Chat:
|
|
421
|
+
"""Turn the model messages into an Outlines Chat instance."""
|
|
422
|
+
chat = Chat()
|
|
423
|
+
|
|
424
|
+
if instructions := self._get_instructions(messages):
|
|
425
|
+
chat.add_system_message(instructions)
|
|
426
|
+
|
|
427
|
+
if output_format_instructions:
|
|
428
|
+
chat.add_system_message(output_format_instructions)
|
|
429
|
+
|
|
430
|
+
for message in messages:
|
|
431
|
+
if isinstance(message, ModelRequest):
|
|
432
|
+
for part in message.parts:
|
|
433
|
+
if isinstance(part, SystemPromptPart):
|
|
434
|
+
chat.add_system_message(part.content)
|
|
435
|
+
elif isinstance(part, UserPromptPart):
|
|
436
|
+
if isinstance(part.content, str):
|
|
437
|
+
chat.add_user_message(part.content)
|
|
438
|
+
elif isinstance(part.content, Sequence):
|
|
439
|
+
outlines_input: Sequence[str | Image] = []
|
|
440
|
+
for item in part.content:
|
|
441
|
+
if isinstance(item, str):
|
|
442
|
+
outlines_input.append(item)
|
|
443
|
+
elif isinstance(item, ImageUrl):
|
|
444
|
+
image_content: DownloadedItem[bytes] = await download_item(
|
|
445
|
+
item, data_format='bytes', type_format='mime'
|
|
446
|
+
)
|
|
447
|
+
image = self._create_PIL_image(image_content['data'], image_content['data_type'])
|
|
448
|
+
outlines_input.append(Image(image))
|
|
449
|
+
elif isinstance(item, BinaryContent) and item.is_image:
|
|
450
|
+
image = self._create_PIL_image(item.data, item.media_type)
|
|
451
|
+
outlines_input.append(Image(image))
|
|
452
|
+
else:
|
|
453
|
+
raise UserError(
|
|
454
|
+
'Each element of the content sequence must be a string, an `ImageUrl`'
|
|
455
|
+
+ ' or a `BinaryImage`.'
|
|
456
|
+
)
|
|
457
|
+
chat.add_user_message(outlines_input)
|
|
458
|
+
else:
|
|
459
|
+
assert_never(part.content)
|
|
460
|
+
elif isinstance(part, RetryPromptPart):
|
|
461
|
+
chat.add_user_message(part.model_response())
|
|
462
|
+
elif isinstance(part, ToolReturnPart):
|
|
463
|
+
raise UserError('Tool calls are not supported for Outlines models yet.')
|
|
464
|
+
else:
|
|
465
|
+
assert_never(part)
|
|
466
|
+
elif isinstance(message, ModelResponse):
|
|
467
|
+
text_parts: list[str] = []
|
|
468
|
+
image_parts: list[Image] = []
|
|
469
|
+
for part in message.parts:
|
|
470
|
+
if isinstance(part, TextPart):
|
|
471
|
+
text_parts.append(part.content)
|
|
472
|
+
elif isinstance(part, ThinkingPart):
|
|
473
|
+
# NOTE: We don't send ThinkingPart to the providers yet.
|
|
474
|
+
pass
|
|
475
|
+
elif isinstance(part, ToolCallPart | BuiltinToolCallPart | BuiltinToolReturnPart):
|
|
476
|
+
raise UserError('Tool calls are not supported for Outlines models yet.')
|
|
477
|
+
elif isinstance(part, FilePart):
|
|
478
|
+
if isinstance(part.content, BinaryContent) and part.content.is_image:
|
|
479
|
+
image = self._create_PIL_image(part.content.data, part.content.media_type)
|
|
480
|
+
image_parts.append(Image(image))
|
|
481
|
+
else:
|
|
482
|
+
raise UserError(
|
|
483
|
+
'File parts other than `BinaryImage` are not supported for Outlines models yet.'
|
|
484
|
+
)
|
|
485
|
+
else:
|
|
486
|
+
assert_never(part)
|
|
487
|
+
if len(text_parts) == 1 and len(image_parts) == 0:
|
|
488
|
+
chat.add_assistant_message(text_parts[0])
|
|
489
|
+
else:
|
|
490
|
+
chat.add_assistant_message([*text_parts, *image_parts])
|
|
491
|
+
else:
|
|
492
|
+
assert_never(message)
|
|
493
|
+
return chat
|
|
494
|
+
|
|
495
|
+
def _create_PIL_image(self, data: bytes, data_type: str) -> PILImage.Image:
|
|
496
|
+
"""Create a PIL Image from the data and data type."""
|
|
497
|
+
image = PILImage.open(io.BytesIO(data))
|
|
498
|
+
image.format = data_type.split('/')[-1]
|
|
499
|
+
return image
|
|
500
|
+
|
|
501
|
+
def _process_response(self, response: str) -> ModelResponse:
|
|
502
|
+
"""Turn the Outlines text response into a Pydantic AI model response instance."""
|
|
503
|
+
return ModelResponse(
|
|
504
|
+
parts=cast(
|
|
505
|
+
list[ModelResponsePart], split_content_into_text_and_thinking(response, self.profile.thinking_tags)
|
|
506
|
+
),
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
async def _process_streamed_response(
|
|
510
|
+
self, response: AsyncIterable[str], model_request_parameters: ModelRequestParameters
|
|
511
|
+
) -> StreamedResponse:
|
|
512
|
+
"""Turn the Outlines text response into a Pydantic AI streamed response instance."""
|
|
513
|
+
peekable_response = _utils.PeekableAsyncStream(response)
|
|
514
|
+
first_chunk = await peekable_response.peek()
|
|
515
|
+
if isinstance(first_chunk, _utils.Unset): # pragma: no cover
|
|
516
|
+
raise UnexpectedModelBehavior('Streamed response ended without content or tool calls')
|
|
517
|
+
|
|
518
|
+
timestamp = datetime.now(tz=timezone.utc)
|
|
519
|
+
return OutlinesStreamedResponse(
|
|
520
|
+
model_request_parameters=model_request_parameters,
|
|
521
|
+
_model_name=self._model_name,
|
|
522
|
+
_model_profile=self.profile,
|
|
523
|
+
_response=peekable_response,
|
|
524
|
+
_timestamp=timestamp,
|
|
525
|
+
_provider_name='outlines',
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
@dataclass
|
|
530
|
+
class OutlinesStreamedResponse(StreamedResponse):
|
|
531
|
+
"""Implementation of `StreamedResponse` for Outlines models."""
|
|
532
|
+
|
|
533
|
+
_model_name: str
|
|
534
|
+
_model_profile: ModelProfile
|
|
535
|
+
_response: AsyncIterable[str]
|
|
536
|
+
_timestamp: datetime
|
|
537
|
+
_provider_name: str
|
|
538
|
+
|
|
539
|
+
async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
|
|
540
|
+
async for event in self._response:
|
|
541
|
+
event = self._parts_manager.handle_text_delta(
|
|
542
|
+
vendor_part_id='content',
|
|
543
|
+
content=event,
|
|
544
|
+
thinking_tags=self._model_profile.thinking_tags,
|
|
545
|
+
ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
|
|
546
|
+
)
|
|
547
|
+
if event is not None: # pragma: no branch
|
|
548
|
+
yield event
|
|
549
|
+
|
|
550
|
+
@property
|
|
551
|
+
def model_name(self) -> str:
|
|
552
|
+
"""Get the model name of the response."""
|
|
553
|
+
return self._model_name
|
|
554
|
+
|
|
555
|
+
@property
|
|
556
|
+
def provider_name(self) -> str:
|
|
557
|
+
"""Get the provider name."""
|
|
558
|
+
return self._provider_name
|
|
559
|
+
|
|
560
|
+
@property
|
|
561
|
+
def timestamp(self) -> datetime:
|
|
562
|
+
"""Get the timestamp of the response."""
|
|
563
|
+
return self._timestamp
|
pydantic_ai/models/test.py
CHANGED
|
@@ -44,11 +44,14 @@ class _WrappedTextOutput:
|
|
|
44
44
|
value: str | None
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
@dataclass
|
|
47
|
+
@dataclass(init=False)
|
|
48
48
|
class _WrappedToolOutput:
|
|
49
49
|
"""A wrapper class to tag an output that came from the custom_output_args field."""
|
|
50
50
|
|
|
51
|
-
value: Any | None
|
|
51
|
+
value: dict[str, Any] | None
|
|
52
|
+
|
|
53
|
+
def __init__(self, value: Any | None):
|
|
54
|
+
self.value = pydantic_core.to_jsonable_python(value)
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
@dataclass(init=False)
|
|
@@ -364,7 +367,7 @@ class _JsonSchemaTestData:
|
|
|
364
367
|
self.defs = schema.get('$defs', {})
|
|
365
368
|
self.seed = seed
|
|
366
369
|
|
|
367
|
-
def generate(self) -> Any:
|
|
370
|
+
def generate(self) -> dict[str, Any]:
|
|
368
371
|
"""Generate data for the JSON schema."""
|
|
369
372
|
return self._gen_any(self.schema)
|
|
370
373
|
|
pydantic_ai/profiles/openai.py
CHANGED
|
@@ -44,6 +44,13 @@ class OpenAIModelProfile(ModelProfile):
|
|
|
44
44
|
openai_supports_encrypted_reasoning_content: bool = False
|
|
45
45
|
"""Whether the model supports including encrypted reasoning content in the response."""
|
|
46
46
|
|
|
47
|
+
openai_responses_requires_function_call_status_none: bool = False
|
|
48
|
+
"""Whether the Responses API requires the `status` field on function tool calls to be `None`.
|
|
49
|
+
|
|
50
|
+
This is required by vLLM Responses API versions before https://github.com/vllm-project/vllm/pull/26706.
|
|
51
|
+
See https://github.com/pydantic/pydantic-ai/issues/3245 for more details.
|
|
52
|
+
"""
|
|
53
|
+
|
|
47
54
|
def __post_init__(self): # pragma: no cover
|
|
48
55
|
if not self.openai_supports_sampling_settings:
|
|
49
56
|
warnings.warn(
|
|
@@ -8,7 +8,7 @@ from __future__ import annotations as _annotations
|
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
9
|
from typing import Any, Generic, TypeVar
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from ..profiles import ModelProfile
|
|
12
12
|
|
|
13
13
|
InterfaceClient = TypeVar('InterfaceClient')
|
|
14
14
|
|
|
@@ -53,7 +53,7 @@ class Provider(ABC, Generic[InterfaceClient]):
|
|
|
53
53
|
|
|
54
54
|
def infer_provider_class(provider: str) -> type[Provider[Any]]: # noqa: C901
|
|
55
55
|
"""Infers the provider class from the provider name."""
|
|
56
|
-
if provider
|
|
56
|
+
if provider in ('openai', 'openai-chat', 'openai-responses'):
|
|
57
57
|
from .openai import OpenAIProvider
|
|
58
58
|
|
|
59
59
|
return OpenAIProvider
|
|
@@ -73,15 +73,10 @@ def infer_provider_class(provider: str) -> type[Provider[Any]]: # noqa: C901
|
|
|
73
73
|
from .azure import AzureProvider
|
|
74
74
|
|
|
75
75
|
return AzureProvider
|
|
76
|
-
elif provider
|
|
77
|
-
from .
|
|
76
|
+
elif provider in ('google-vertex', 'google-gla'):
|
|
77
|
+
from .google import GoogleProvider
|
|
78
78
|
|
|
79
|
-
return
|
|
80
|
-
elif provider == 'google-gla':
|
|
81
|
-
from .google_gla import GoogleGLAProvider # type: ignore[reportDeprecated]
|
|
82
|
-
|
|
83
|
-
return GoogleGLAProvider # type: ignore[reportDeprecated]
|
|
84
|
-
# NOTE: We don't test because there are many ways the `boto3.client` can retrieve the credentials.
|
|
79
|
+
return GoogleProvider
|
|
85
80
|
elif provider == 'bedrock':
|
|
86
81
|
from .bedrock import BedrockProvider
|
|
87
82
|
|
|
@@ -146,11 +141,29 @@ def infer_provider_class(provider: str) -> type[Provider[Any]]: # noqa: C901
|
|
|
146
141
|
from .nebius import NebiusProvider
|
|
147
142
|
|
|
148
143
|
return NebiusProvider
|
|
144
|
+
elif provider == 'ovhcloud':
|
|
145
|
+
from .ovhcloud import OVHcloudProvider
|
|
146
|
+
|
|
147
|
+
return OVHcloudProvider
|
|
148
|
+
elif provider == 'outlines':
|
|
149
|
+
from .outlines import OutlinesProvider
|
|
150
|
+
|
|
151
|
+
return OutlinesProvider
|
|
149
152
|
else: # pragma: no cover
|
|
150
153
|
raise ValueError(f'Unknown provider: {provider}')
|
|
151
154
|
|
|
152
155
|
|
|
153
156
|
def infer_provider(provider: str) -> Provider[Any]:
|
|
154
157
|
"""Infer the provider from the provider name."""
|
|
155
|
-
|
|
156
|
-
|
|
158
|
+
if provider.startswith('gateway/'):
|
|
159
|
+
from .gateway import gateway_provider
|
|
160
|
+
|
|
161
|
+
provider = provider.removeprefix('gateway/')
|
|
162
|
+
return gateway_provider(provider)
|
|
163
|
+
elif provider in ('google-vertex', 'google-gla'):
|
|
164
|
+
from .google import GoogleProvider
|
|
165
|
+
|
|
166
|
+
return GoogleProvider(vertexai=provider == 'google-vertex')
|
|
167
|
+
else:
|
|
168
|
+
provider_class = infer_provider_class(provider)
|
|
169
|
+
return provider_class()
|
|
@@ -12,7 +12,7 @@ from pydantic_ai.profiles.anthropic import anthropic_model_profile
|
|
|
12
12
|
from pydantic_ai.providers import Provider
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
|
-
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
|
|
15
|
+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, AsyncAnthropicVertex
|
|
16
16
|
except ImportError as _import_error:
|
|
17
17
|
raise ImportError(
|
|
18
18
|
'Please install the `anthropic` package to use the Anthropic provider, '
|
|
@@ -20,7 +20,7 @@ except ImportError as _import_error:
|
|
|
20
20
|
) from _import_error
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
AsyncAnthropicClient: TypeAlias = AsyncAnthropic | AsyncAnthropicBedrock
|
|
23
|
+
AsyncAnthropicClient: TypeAlias = AsyncAnthropic | AsyncAnthropicBedrock | AsyncAnthropicVertex
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class AnthropicProvider(Provider[AsyncAnthropicClient]):
|