pydantic-ai-slim 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,14 +31,7 @@ from ..profiles import ModelProfileSpec
31
31
  from ..providers import Provider, infer_provider
32
32
  from ..settings import ModelSettings
33
33
  from ..tools import ToolDefinition
34
- from . import (
35
- Model,
36
- ModelRequestParameters,
37
- StreamedResponse,
38
- cached_async_http_client,
39
- check_allow_model_requests,
40
- get_user_agent,
41
- )
34
+ from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent
42
35
 
43
36
  try:
44
37
  from anthropic import NOT_GIVEN, APIStatusError, AsyncAnthropic, AsyncStream
@@ -283,7 +276,7 @@ class AnthropicModel(Model):
283
276
  tools += [self._map_tool_definition(r) for r in model_request_parameters.output_tools]
284
277
  return tools
285
278
 
286
- async def _map_message(self, messages: list[ModelMessage]) -> tuple[str, list[BetaMessageParam]]:
279
+ async def _map_message(self, messages: list[ModelMessage]) -> tuple[str, list[BetaMessageParam]]: # noqa: C901
287
280
  """Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
288
281
  system_prompt_parts: list[str] = []
289
282
  anthropic_messages: list[BetaMessageParam] = []
@@ -322,7 +315,8 @@ class AnthropicModel(Model):
322
315
  assistant_content_params: list[BetaTextBlockParam | BetaToolUseBlockParam] = []
323
316
  for response_part in m.parts:
324
317
  if isinstance(response_part, TextPart):
325
- assistant_content_params.append(BetaTextBlockParam(text=response_part.content, type='text'))
318
+ if response_part.content: # Only add non-empty text
319
+ assistant_content_params.append(BetaTextBlockParam(text=response_part.content, type='text'))
326
320
  else:
327
321
  tool_use_block_param = BetaToolUseBlockParam(
328
322
  id=_guard_tool_call_id(t=response_part),
@@ -331,7 +325,8 @@ class AnthropicModel(Model):
331
325
  input=response_part.args_as_dict(),
332
326
  )
333
327
  assistant_content_params.append(tool_use_block_param)
334
- anthropic_messages.append(BetaMessageParam(role='assistant', content=assistant_content_params))
328
+ if len(assistant_content_params) > 0:
329
+ anthropic_messages.append(BetaMessageParam(role='assistant', content=assistant_content_params))
335
330
  else:
336
331
  assert_never(m)
337
332
  system_prompt = '\n\n'.join(system_prompt_parts)
@@ -344,11 +339,13 @@ class AnthropicModel(Model):
344
339
  part: UserPromptPart,
345
340
  ) -> AsyncGenerator[BetaContentBlockParam]:
346
341
  if isinstance(part.content, str):
347
- yield BetaTextBlockParam(text=part.content, type='text')
342
+ if part.content: # Only yield non-empty text
343
+ yield BetaTextBlockParam(text=part.content, type='text')
348
344
  else:
349
345
  for item in part.content:
350
346
  if isinstance(item, str):
351
- yield BetaTextBlockParam(text=item, type='text')
347
+ if item: # Only yield non-empty text
348
+ yield BetaTextBlockParam(text=item, type='text')
352
349
  elif isinstance(item, BinaryContent):
353
350
  if item.is_image:
354
351
  yield BetaImageBlockParam(
@@ -372,11 +369,10 @@ class AnthropicModel(Model):
372
369
  if item.media_type == 'application/pdf':
373
370
  yield BetaBase64PDFBlockParam(source={'url': item.url, 'type': 'url'}, type='document')
374
371
  elif item.media_type == 'text/plain':
375
- response = await cached_async_http_client().get(item.url)
376
- response.raise_for_status()
372
+ downloaded_item = await download_item(item, data_format='text')
377
373
  yield BetaBase64PDFBlockParam(
378
374
  source=BetaPlainTextSourceParam(
379
- data=response.text, media_type=item.media_type, type='text'
375
+ data=downloaded_item['data'], media_type=item.media_type, type='text'
380
376
  ),
381
377
  type='document',
382
378
  )
@@ -32,12 +32,7 @@ from pydantic_ai.messages import (
32
32
  UserPromptPart,
33
33
  VideoUrl,
34
34
  )
35
- from pydantic_ai.models import (
36
- Model,
37
- ModelRequestParameters,
38
- StreamedResponse,
39
- cached_async_http_client,
40
- )
35
+ from pydantic_ai.models import Model, ModelRequestParameters, StreamedResponse, download_item
41
36
  from pydantic_ai.profiles import ModelProfileSpec
42
37
  from pydantic_ai.providers import Provider, infer_provider
43
38
  from pydantic_ai.providers.bedrock import BedrockModelProfile
@@ -55,6 +50,7 @@ if TYPE_CHECKING:
55
50
  ConverseResponseTypeDef,
56
51
  ConverseStreamMetadataEventTypeDef,
57
52
  ConverseStreamOutputTypeDef,
53
+ DocumentBlockTypeDef,
58
54
  GuardrailConfigurationTypeDef,
59
55
  ImageBlockTypeDef,
60
56
  InferenceConfigurationTypeDef,
@@ -507,25 +503,37 @@ class BedrockConverseModel(Model):
507
503
  else:
508
504
  raise NotImplementedError('Binary content is not supported yet.')
509
505
  elif isinstance(item, (ImageUrl, DocumentUrl, VideoUrl)):
510
- response = await cached_async_http_client().get(item.url)
511
- response.raise_for_status()
506
+ downloaded_item = await download_item(item, data_format='bytes', type_format='extension')
507
+ format = downloaded_item['data_type']
512
508
  if item.kind == 'image-url':
513
509
  format = item.media_type.split('/')[1]
514
510
  assert format in ('jpeg', 'png', 'gif', 'webp'), f'Unsupported image format: {format}'
515
- image: ImageBlockTypeDef = {'format': format, 'source': {'bytes': response.content}}
511
+ image: ImageBlockTypeDef = {'format': format, 'source': {'bytes': downloaded_item['data']}}
516
512
  content.append({'image': image})
517
513
 
518
514
  elif item.kind == 'document-url':
519
515
  name = f'Document {next(document_count)}'
520
- data = response.content
521
- content.append({'document': {'name': name, 'format': item.format, 'source': {'bytes': data}}})
516
+ document: DocumentBlockTypeDef = {
517
+ 'name': name,
518
+ 'format': item.format,
519
+ 'source': {'bytes': downloaded_item['data']},
520
+ }
521
+ content.append({'document': document})
522
522
 
523
523
  elif item.kind == 'video-url': # pragma: no branch
524
524
  format = item.media_type.split('/')[1]
525
- assert format in ('mkv', 'mov', 'mp4', 'webm', 'flv', 'mpeg', 'mpg', 'wmv', 'three_gp'), (
526
- f'Unsupported video format: {format}'
527
- )
528
- video: VideoBlockTypeDef = {'format': format, 'source': {'bytes': response.content}}
525
+ assert format in (
526
+ 'mkv',
527
+ 'mov',
528
+ 'mp4',
529
+ 'webm',
530
+ 'flv',
531
+ 'mpeg',
532
+ 'mpg',
533
+ 'wmv',
534
+ 'three_gp',
535
+ ), f'Unsupported video format: {format}'
536
+ video: VideoBlockTypeDef = {'format': format, 'source': {'bytes': downloaded_item['data']}}
529
537
  content.append({'video': video})
530
538
  elif isinstance(item, AudioUrl): # pragma: no cover
531
539
  raise NotImplementedError('Audio is not supported yet.')
@@ -17,10 +17,8 @@ from pydantic_ai.providers import Provider, infer_provider
17
17
 
18
18
  from .. import ModelHTTPError, UnexpectedModelBehavior, _utils, usage
19
19
  from ..messages import (
20
- AudioUrl,
21
20
  BinaryContent,
22
- DocumentUrl,
23
- ImageUrl,
21
+ FileUrl,
24
22
  ModelMessage,
25
23
  ModelRequest,
26
24
  ModelResponse,
@@ -41,8 +39,8 @@ from . import (
41
39
  Model,
42
40
  ModelRequestParameters,
43
41
  StreamedResponse,
44
- cached_async_http_client,
45
42
  check_allow_model_requests,
43
+ download_item,
46
44
  get_user_agent,
47
45
  )
48
46
 
@@ -348,15 +346,19 @@ class GeminiModel(Model):
348
346
  content.append(
349
347
  _GeminiInlineDataPart(inline_data={'data': base64_encoded, 'mime_type': item.media_type})
350
348
  )
351
- elif isinstance(item, (AudioUrl, ImageUrl, DocumentUrl, VideoUrl)):
352
- client = cached_async_http_client()
353
- response = await client.get(item.url, follow_redirects=True)
354
- response.raise_for_status()
355
- mime_type = response.headers['Content-Type'].split(';')[0]
356
- inline_data = _GeminiInlineDataPart(
357
- inline_data={'data': base64.b64encode(response.content).decode('utf-8'), 'mime_type': mime_type}
358
- )
359
- content.append(inline_data)
349
+ elif isinstance(item, VideoUrl) and item.is_youtube:
350
+ file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
351
+ content.append(file_data)
352
+ elif isinstance(item, FileUrl):
353
+ if self.system == 'google-gla' or item.force_download:
354
+ downloaded_item = await download_item(item, data_format='base64')
355
+ inline_data = _GeminiInlineDataPart(
356
+ inline_data={'data': downloaded_item['data'], 'mime_type': downloaded_item['data_type']}
357
+ )
358
+ content.append(inline_data)
359
+ else:
360
+ file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
361
+ content.append(file_data)
360
362
  else:
361
363
  assert_never(item)
362
364
  return content
@@ -14,10 +14,8 @@ from pydantic_ai.providers import Provider
14
14
 
15
15
  from .. import UnexpectedModelBehavior, _utils, usage
16
16
  from ..messages import (
17
- AudioUrl,
18
17
  BinaryContent,
19
- DocumentUrl,
20
- ImageUrl,
18
+ FileUrl,
21
19
  ModelMessage,
22
20
  ModelRequest,
23
21
  ModelResponse,
@@ -38,8 +36,8 @@ from . import (
38
36
  Model,
39
37
  ModelRequestParameters,
40
38
  StreamedResponse,
41
- cached_async_http_client,
42
39
  check_allow_model_requests,
40
+ download_item,
43
41
  get_user_agent,
44
42
  )
45
43
 
@@ -372,13 +370,15 @@ class GoogleModel(Model):
372
370
  # NOTE: The type from Google GenAI is incorrect, it should be `str`, not `bytes`.
373
371
  base64_encoded = base64.b64encode(item.data).decode('utf-8')
374
372
  content.append({'inline_data': {'data': base64_encoded, 'mime_type': item.media_type}}) # type: ignore
375
- elif isinstance(item, (AudioUrl, ImageUrl, DocumentUrl, VideoUrl)):
376
- client = cached_async_http_client()
377
- response = await client.get(item.url, follow_redirects=True)
378
- response.raise_for_status()
379
- # NOTE: The type from Google GenAI is incorrect, it should be `str`, not `bytes`.
380
- base64_encoded = base64.b64encode(response.content).decode('utf-8')
381
- content.append({'inline_data': {'data': base64_encoded, 'mime_type': item.media_type}}) # type: ignore
373
+ elif isinstance(item, VideoUrl) and item.is_youtube:
374
+ content.append({'file_data': {'file_uri': item.url, 'mime_type': item.media_type}})
375
+ elif isinstance(item, FileUrl):
376
+ if self.system == 'google-gla' or item.force_download:
377
+ downloaded_item = await download_item(item, data_format='base64')
378
+ inline_data = {'data': downloaded_item['data'], 'mime_type': downloaded_item['data_type']}
379
+ content.append({'inline_data': inline_data}) # type: ignore
380
+ else:
381
+ content.append({'file_data': {'file_uri': item.url, 'mime_type': item.media_type}})
382
382
  else:
383
383
  assert_never(item)
384
384
  return content
@@ -13,6 +13,7 @@ from opentelemetry._events import (
13
13
  EventLoggerProvider, # pyright: ignore[reportPrivateImportUsage]
14
14
  get_event_logger_provider, # pyright: ignore[reportPrivateImportUsage]
15
15
  )
16
+ from opentelemetry.metrics import MeterProvider, get_meter_provider
16
17
  from opentelemetry.trace import Span, Tracer, TracerProvider, get_tracer_provider
17
18
  from opentelemetry.util.types import AttributeValue
18
19
  from pydantic import TypeAdapter
@@ -49,6 +50,10 @@ MODEL_SETTING_ATTRIBUTES: tuple[
49
50
 
50
51
  ANY_ADAPTER = TypeAdapter[Any](Any)
51
52
 
53
+ # These are in the spec:
54
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclienttokenusage
55
+ TOKEN_HISTOGRAM_BOUNDARIES = (1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864)
56
+
52
57
 
53
58
  def instrument_model(model: Model, instrument: InstrumentationSettings | bool) -> Model:
54
59
  """Instrument a model with OpenTelemetry/logfire."""
@@ -84,6 +89,7 @@ class InstrumentationSettings:
84
89
  *,
85
90
  event_mode: Literal['attributes', 'logs'] = 'attributes',
86
91
  tracer_provider: TracerProvider | None = None,
92
+ meter_provider: MeterProvider | None = None,
87
93
  event_logger_provider: EventLoggerProvider | None = None,
88
94
  include_binary_content: bool = True,
89
95
  ):
@@ -95,6 +101,9 @@ class InstrumentationSettings:
95
101
  tracer_provider: The OpenTelemetry tracer provider to use.
96
102
  If not provided, the global tracer provider is used.
97
103
  Calling `logfire.configure()` sets the global tracer provider, so most users don't need this.
104
+ meter_provider: The OpenTelemetry meter provider to use.
105
+ If not provided, the global meter provider is used.
106
+ Calling `logfire.configure()` sets the global meter provider, so most users don't need this.
98
107
  event_logger_provider: The OpenTelemetry event logger provider to use.
99
108
  If not provided, the global event logger provider is used.
100
109
  Calling `logfire.configure()` sets the global event logger provider, so most users don't need this.
@@ -104,12 +113,33 @@ class InstrumentationSettings:
104
113
  from pydantic_ai import __version__
105
114
 
106
115
  tracer_provider = tracer_provider or get_tracer_provider()
116
+ meter_provider = meter_provider or get_meter_provider()
107
117
  event_logger_provider = event_logger_provider or get_event_logger_provider()
108
- self.tracer = tracer_provider.get_tracer('pydantic-ai', __version__)
109
- self.event_logger = event_logger_provider.get_event_logger('pydantic-ai', __version__)
118
+ scope_name = 'pydantic-ai'
119
+ self.tracer = tracer_provider.get_tracer(scope_name, __version__)
120
+ self.meter = meter_provider.get_meter(scope_name, __version__)
121
+ self.event_logger = event_logger_provider.get_event_logger(scope_name, __version__)
110
122
  self.event_mode = event_mode
111
123
  self.include_binary_content = include_binary_content
112
124
 
125
+ # As specified in the OpenTelemetry GenAI metrics spec:
126
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclienttokenusage
127
+ tokens_histogram_kwargs = dict(
128
+ name='gen_ai.client.token.usage',
129
+ unit='{token}',
130
+ description='Measures number of input and output tokens used',
131
+ )
132
+ try:
133
+ self.tokens_histogram = self.meter.create_histogram(
134
+ **tokens_histogram_kwargs,
135
+ explicit_bucket_boundaries_advisory=TOKEN_HISTOGRAM_BOUNDARIES,
136
+ )
137
+ except TypeError:
138
+ # Older OTel/logfire versions don't support explicit_bucket_boundaries_advisory
139
+ self.tokens_histogram = self.meter.create_histogram(
140
+ **tokens_histogram_kwargs, # pyright: ignore
141
+ )
142
+
113
143
  def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]:
114
144
  """Convert a list of model messages to OpenTelemetry events.
115
145
 
@@ -224,38 +254,74 @@ class InstrumentedModel(WrapperModel):
224
254
  if isinstance(value := model_settings.get(key), (float, int)):
225
255
  attributes[f'gen_ai.request.{key}'] = value
226
256
 
227
- with self.settings.tracer.start_as_current_span(span_name, attributes=attributes) as span:
228
-
229
- def finish(response: ModelResponse):
230
- if not span.is_recording():
231
- return
232
-
233
- events = self.settings.messages_to_otel_events(messages)
234
- for event in self.settings.messages_to_otel_events([response]):
235
- events.append(
236
- Event(
237
- 'gen_ai.choice',
238
- body={
239
- # TODO finish_reason
240
- 'index': 0,
241
- 'message': event.body,
242
- },
257
+ record_metrics: Callable[[], None] | None = None
258
+ try:
259
+ with self.settings.tracer.start_as_current_span(span_name, attributes=attributes) as span:
260
+
261
+ def finish(response: ModelResponse):
262
+ # FallbackModel updates these span attributes.
263
+ attributes.update(getattr(span, 'attributes', {}))
264
+ request_model = attributes[GEN_AI_REQUEST_MODEL_ATTRIBUTE]
265
+ system = attributes[GEN_AI_SYSTEM_ATTRIBUTE]
266
+
267
+ response_model = response.model_name or request_model
268
+
269
+ def _record_metrics():
270
+ metric_attributes = {
271
+ GEN_AI_SYSTEM_ATTRIBUTE: system,
272
+ 'gen_ai.operation.name': operation,
273
+ 'gen_ai.request.model': request_model,
274
+ 'gen_ai.response.model': response_model,
275
+ }
276
+ if response.usage.request_tokens: # pragma: no branch
277
+ self.settings.tokens_histogram.record(
278
+ response.usage.request_tokens,
279
+ {**metric_attributes, 'gen_ai.token.type': 'input'},
280
+ )
281
+ if response.usage.response_tokens: # pragma: no branch
282
+ self.settings.tokens_histogram.record(
283
+ response.usage.response_tokens,
284
+ {**metric_attributes, 'gen_ai.token.type': 'output'},
285
+ )
286
+
287
+ nonlocal record_metrics
288
+ record_metrics = _record_metrics
289
+
290
+ if not span.is_recording():
291
+ return
292
+
293
+ events = self.settings.messages_to_otel_events(messages)
294
+ for event in self.settings.messages_to_otel_events([response]):
295
+ events.append(
296
+ Event(
297
+ 'gen_ai.choice',
298
+ body={
299
+ # TODO finish_reason
300
+ 'index': 0,
301
+ 'message': event.body,
302
+ },
303
+ )
243
304
  )
305
+ span.set_attributes(
306
+ {
307
+ **response.usage.opentelemetry_attributes(),
308
+ 'gen_ai.response.model': response_model,
309
+ }
244
310
  )
245
- new_attributes: dict[str, AttributeValue] = response.usage.opentelemetry_attributes() # pyright: ignore[reportAssignmentType]
246
- attributes.update(getattr(span, 'attributes', {}))
247
- request_model = attributes[GEN_AI_REQUEST_MODEL_ATTRIBUTE]
248
- new_attributes['gen_ai.response.model'] = response.model_name or request_model
249
- span.set_attributes(new_attributes)
250
- span.update_name(f'{operation} {request_model}')
251
- for event in events:
252
- event.attributes = {
253
- GEN_AI_SYSTEM_ATTRIBUTE: attributes[GEN_AI_SYSTEM_ATTRIBUTE],
254
- **(event.attributes or {}),
255
- }
256
- self._emit_events(span, events)
257
-
258
- yield finish
311
+ span.update_name(f'{operation} {request_model}')
312
+ for event in events:
313
+ event.attributes = {
314
+ GEN_AI_SYSTEM_ATTRIBUTE: system,
315
+ **(event.attributes or {}),
316
+ }
317
+ self._emit_events(span, events)
318
+
319
+ yield finish
320
+ finally:
321
+ if record_metrics:
322
+ # We only want to record metrics after the span is finished,
323
+ # to prevent them from being redundantly recorded in the span itself by logfire.
324
+ record_metrics()
259
325
 
260
326
  def _emit_events(self, span: Span, events: list[Event]) -> None:
261
327
  if self.settings.event_mode == 'logs':
@@ -40,8 +40,8 @@ from . import (
40
40
  Model,
41
41
  ModelRequestParameters,
42
42
  StreamedResponse,
43
- cached_async_http_client,
44
43
  check_allow_model_requests,
44
+ download_item,
45
45
  get_user_agent,
46
46
  )
47
47
 
@@ -116,6 +116,13 @@ class OpenAIModelSettings(ModelSettings, total=False):
116
116
  See [OpenAI's safety best practices](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids) for more details.
117
117
  """
118
118
 
119
+ openai_service_tier: Literal['auto', 'default', 'flex']
120
+ """The service tier to use for the model request.
121
+
122
+ Currently supported values are `auto`, `default`, and `flex`.
123
+ For more information, see [OpenAI's service tiers documentation](https://platform.openai.com/docs/api-reference/chat/object#chat/object-service_tier).
124
+ """
125
+
119
126
 
120
127
  class OpenAIResponsesModelSettings(OpenAIModelSettings, total=False):
121
128
  """Settings used for an OpenAI Responses model request.
@@ -274,6 +281,12 @@ class OpenAIModel(Model):
274
281
 
275
282
  openai_messages = await self._map_messages(messages)
276
283
 
284
+ sampling_settings = (
285
+ model_settings
286
+ if OpenAIModelProfile.from_profile(self.profile).openai_supports_sampling_settings
287
+ else OpenAIModelSettings()
288
+ )
289
+
277
290
  try:
278
291
  extra_headers = model_settings.get('extra_headers', {})
279
292
  extra_headers.setdefault('User-Agent', get_user_agent())
@@ -287,17 +300,18 @@ class OpenAIModel(Model):
287
300
  stream_options={'include_usage': True} if stream else NOT_GIVEN,
288
301
  stop=model_settings.get('stop_sequences', NOT_GIVEN),
289
302
  max_completion_tokens=model_settings.get('max_tokens', NOT_GIVEN),
290
- temperature=model_settings.get('temperature', NOT_GIVEN),
291
- top_p=model_settings.get('top_p', NOT_GIVEN),
292
303
  timeout=model_settings.get('timeout', NOT_GIVEN),
293
304
  seed=model_settings.get('seed', NOT_GIVEN),
294
- presence_penalty=model_settings.get('presence_penalty', NOT_GIVEN),
295
- frequency_penalty=model_settings.get('frequency_penalty', NOT_GIVEN),
296
- logit_bias=model_settings.get('logit_bias', NOT_GIVEN),
297
305
  reasoning_effort=model_settings.get('openai_reasoning_effort', NOT_GIVEN),
298
- logprobs=model_settings.get('openai_logprobs', NOT_GIVEN),
299
- top_logprobs=model_settings.get('openai_top_logprobs', NOT_GIVEN),
300
306
  user=model_settings.get('openai_user', NOT_GIVEN),
307
+ service_tier=model_settings.get('openai_service_tier', NOT_GIVEN),
308
+ temperature=sampling_settings.get('temperature', NOT_GIVEN),
309
+ top_p=sampling_settings.get('top_p', NOT_GIVEN),
310
+ presence_penalty=sampling_settings.get('presence_penalty', NOT_GIVEN),
311
+ frequency_penalty=sampling_settings.get('frequency_penalty', NOT_GIVEN),
312
+ logit_bias=sampling_settings.get('logit_bias', NOT_GIVEN),
313
+ logprobs=sampling_settings.get('openai_logprobs', NOT_GIVEN),
314
+ top_logprobs=sampling_settings.get('openai_top_logprobs', NOT_GIVEN),
301
315
  extra_headers=extra_headers,
302
316
  extra_body=model_settings.get('extra_body'),
303
317
  )
@@ -485,21 +499,21 @@ class OpenAIModel(Model):
485
499
  else: # pragma: no cover
486
500
  raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
487
501
  elif isinstance(item, AudioUrl):
488
- client = cached_async_http_client()
489
- response = await client.get(item.url)
490
- response.raise_for_status()
491
- base64_encoded = base64.b64encode(response.content).decode('utf-8')
492
- audio_format: Any = response.headers['content-type'].removeprefix('audio/')
493
- audio = InputAudio(data=base64_encoded, format=audio_format)
502
+ downloaded_item = await download_item(item, data_format='base64', type_format='extension')
503
+ assert downloaded_item['data_type'] in (
504
+ 'wav',
505
+ 'mp3',
506
+ ), f'Unsupported audio format: {downloaded_item["data_type"]}'
507
+ audio = InputAudio(data=downloaded_item['data'], format=downloaded_item['data_type'])
494
508
  content.append(ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio'))
495
509
  elif isinstance(item, DocumentUrl):
496
- client = cached_async_http_client()
497
- response = await client.get(item.url)
498
- response.raise_for_status()
499
- base64_encoded = base64.b64encode(response.content).decode('utf-8')
500
- media_type = response.headers.get('content-type').split(';')[0]
501
- file_data = f'data:{media_type};base64,{base64_encoded}'
502
- file = File(file=FileFile(file_data=file_data, filename=f'filename.{item.format}'), type='file')
510
+ downloaded_item = await download_item(item, data_format='base64_uri', type_format='extension')
511
+ file = File(
512
+ file=FileFile(
513
+ file_data=downloaded_item['data'], filename=f'filename.{downloaded_item["data_type"]}'
514
+ ),
515
+ type='file',
516
+ )
503
517
  content.append(file)
504
518
  elif isinstance(item, VideoUrl): # pragma: no cover
505
519
  raise NotImplementedError('VideoUrl is not supported for OpenAI')
@@ -599,7 +613,13 @@ class OpenAIResponsesModel(Model):
599
613
  for item in response.output:
600
614
  if item.type == 'function_call':
601
615
  items.append(ToolCallPart(item.name, item.arguments, tool_call_id=item.call_id))
602
- return ModelResponse(items, usage=_map_usage(response), model_name=response.model, timestamp=timestamp)
616
+ return ModelResponse(
617
+ items,
618
+ usage=_map_usage(response),
619
+ model_name=response.model,
620
+ vendor_id=response.id,
621
+ timestamp=timestamp,
622
+ )
603
623
 
604
624
  async def _process_streamed_response(
605
625
  self, response: AsyncStream[responses.ResponseStreamEvent]
@@ -656,6 +676,12 @@ class OpenAIResponsesModel(Model):
656
676
  instructions, openai_messages = await self._map_messages(messages)
657
677
  reasoning = self._get_reasoning(model_settings)
658
678
 
679
+ sampling_settings = (
680
+ model_settings
681
+ if OpenAIModelProfile.from_profile(self.profile).openai_supports_sampling_settings
682
+ else OpenAIResponsesModelSettings()
683
+ )
684
+
659
685
  try:
660
686
  extra_headers = model_settings.get('extra_headers', {})
661
687
  extra_headers.setdefault('User-Agent', get_user_agent())
@@ -668,8 +694,8 @@ class OpenAIResponsesModel(Model):
668
694
  tool_choice=tool_choice or NOT_GIVEN,
669
695
  max_output_tokens=model_settings.get('max_tokens', NOT_GIVEN),
670
696
  stream=stream,
671
- temperature=model_settings.get('temperature', NOT_GIVEN),
672
- top_p=model_settings.get('top_p', NOT_GIVEN),
697
+ temperature=sampling_settings.get('temperature', NOT_GIVEN),
698
+ top_p=sampling_settings.get('top_p', NOT_GIVEN),
673
699
  truncation=model_settings.get('openai_truncation', NOT_GIVEN),
674
700
  timeout=model_settings.get('timeout', NOT_GIVEN),
675
701
  reasoning=reasoning,
@@ -805,27 +831,21 @@ class OpenAIResponsesModel(Model):
805
831
  responses.ResponseInputImageParam(image_url=item.url, type='input_image', detail='auto')
806
832
  )
807
833
  elif isinstance(item, AudioUrl): # pragma: no cover
808
- client = cached_async_http_client()
809
- response = await client.get(item.url)
810
- response.raise_for_status()
811
- base64_encoded = base64.b64encode(response.content).decode('utf-8')
834
+ downloaded_item = await download_item(item, data_format='base64_uri', type_format='extension')
812
835
  content.append(
813
836
  responses.ResponseInputFileParam(
814
837
  type='input_file',
815
- file_data=f'data:{item.media_type};base64,{base64_encoded}',
838
+ file_data=downloaded_item['data'],
839
+ filename=f'filename.{downloaded_item["data_type"]}',
816
840
  )
817
841
  )
818
842
  elif isinstance(item, DocumentUrl):
819
- client = cached_async_http_client()
820
- response = await client.get(item.url)
821
- response.raise_for_status()
822
- base64_encoded = base64.b64encode(response.content).decode('utf-8')
823
- media_type = response.headers.get('content-type').split(';')[0]
843
+ downloaded_item = await download_item(item, data_format='base64_uri', type_format='extension')
824
844
  content.append(
825
845
  responses.ResponseInputFileParam(
826
846
  type='input_file',
827
- file_data=f'data:{media_type};base64,{base64_encoded}',
828
- filename=f'filename.{item.format}',
847
+ file_data=downloaded_item['data'],
848
+ filename=f'filename.{downloaded_item["data_type"]}',
829
849
  )
830
850
  )
831
851
  elif isinstance(item, VideoUrl): # pragma: no cover
@@ -15,13 +15,20 @@ class OpenAIModelProfile(ModelProfile):
15
15
  ALL FIELDS MUST BE `openai_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS.
16
16
  """
17
17
 
18
- # This can be set by a provider or user if the OpenAI-"compatible" API doesn't support strict tool definitions
19
18
  openai_supports_strict_tool_definition: bool = True
19
+ """This can be set by a provider or user if the OpenAI-"compatible" API doesn't support strict tool definitions."""
20
+
21
+ openai_supports_sampling_settings: bool = True
22
+ """Turn off to don't send sampling settings like `temperature` and `top_p` to models that don't support them, like OpenAI's o-series reasoning models."""
20
23
 
21
24
 
22
25
  def openai_model_profile(model_name: str) -> ModelProfile:
23
26
  """Get the model profile for an OpenAI model."""
24
- return OpenAIModelProfile(json_schema_transformer=OpenAIJsonSchemaTransformer)
27
+ is_reasoning_model = model_name.startswith('o')
28
+ return OpenAIModelProfile(
29
+ json_schema_transformer=OpenAIJsonSchemaTransformer,
30
+ openai_supports_sampling_settings=not is_reasoning_model,
31
+ )
25
32
 
26
33
 
27
34
  _STRICT_INCOMPATIBLE_KEYS = [
@@ -84,7 +84,7 @@ class GoogleProvider(Provider[genai.Client]):
84
84
  """
85
85
  if client is None:
86
86
  # NOTE: We are keeping GEMINI_API_KEY for backwards compatibility.
87
- api_key = api_key or os.environ.get('GOOGLE_API_KEY')
87
+ api_key = api_key or os.getenv('GOOGLE_API_KEY') or os.getenv('GEMINI_API_KEY')
88
88
 
89
89
  if vertexai is None: # pragma: lax no cover
90
90
  vertexai = bool(location or project or credentials)