pydantic-ai-slim 1.0.14__py3-none-any.whl → 1.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (40) hide show
  1. pydantic_ai/__init__.py +19 -1
  2. pydantic_ai/_agent_graph.py +129 -105
  3. pydantic_ai/_cli.py +7 -10
  4. pydantic_ai/_output.py +236 -192
  5. pydantic_ai/_parts_manager.py +8 -42
  6. pydantic_ai/_tool_manager.py +9 -16
  7. pydantic_ai/agent/__init__.py +18 -7
  8. pydantic_ai/agent/abstract.py +192 -23
  9. pydantic_ai/agent/wrapper.py +7 -4
  10. pydantic_ai/builtin_tools.py +82 -0
  11. pydantic_ai/direct.py +16 -9
  12. pydantic_ai/durable_exec/dbos/_agent.py +124 -18
  13. pydantic_ai/durable_exec/temporal/_agent.py +139 -19
  14. pydantic_ai/durable_exec/temporal/_model.py +8 -0
  15. pydantic_ai/format_prompt.py +9 -6
  16. pydantic_ai/mcp.py +20 -10
  17. pydantic_ai/messages.py +214 -44
  18. pydantic_ai/models/__init__.py +15 -1
  19. pydantic_ai/models/anthropic.py +27 -22
  20. pydantic_ai/models/cohere.py +4 -0
  21. pydantic_ai/models/function.py +7 -4
  22. pydantic_ai/models/gemini.py +8 -0
  23. pydantic_ai/models/google.py +56 -23
  24. pydantic_ai/models/groq.py +11 -5
  25. pydantic_ai/models/huggingface.py +5 -3
  26. pydantic_ai/models/mistral.py +6 -8
  27. pydantic_ai/models/openai.py +206 -58
  28. pydantic_ai/models/test.py +4 -0
  29. pydantic_ai/output.py +5 -2
  30. pydantic_ai/profiles/__init__.py +2 -0
  31. pydantic_ai/profiles/google.py +5 -2
  32. pydantic_ai/profiles/openai.py +2 -1
  33. pydantic_ai/result.py +51 -35
  34. pydantic_ai/run.py +35 -7
  35. pydantic_ai/usage.py +40 -5
  36. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.16.dist-info}/METADATA +4 -4
  37. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.16.dist-info}/RECORD +40 -40
  38. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.16.dist-info}/WHEEL +0 -0
  39. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.16.dist-info}/entry_points.txt +0 -0
  40. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -17,14 +17,16 @@ from .._output import DEFAULT_OUTPUT_TOOL_NAME, OutputObjectDefinition
17
17
  from .._run_context import RunContext
18
18
  from .._thinking_part import split_content_into_text_and_thinking
19
19
  from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime
20
- from ..builtin_tools import CodeExecutionTool, WebSearchTool
20
+ from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, WebSearchTool
21
21
  from ..exceptions import UserError
22
22
  from ..messages import (
23
23
  AudioUrl,
24
24
  BinaryContent,
25
+ BinaryImage,
25
26
  BuiltinToolCallPart,
26
27
  BuiltinToolReturnPart,
27
28
  DocumentUrl,
29
+ FilePart,
28
30
  FinishReason,
29
31
  ImageUrl,
30
32
  ModelMessage,
@@ -678,6 +680,9 @@ class OpenAIChatModel(Model):
678
680
  # OpenAI doesn't return built-in tool calls
679
681
  elif isinstance(item, BuiltinToolCallPart | BuiltinToolReturnPart): # pragma: no cover
680
682
  pass
683
+ elif isinstance(item, FilePart): # pragma: no cover
684
+ # Files generated by models are not sent back to models that don't themselves generate files.
685
+ pass
681
686
  else:
682
687
  assert_never(item)
683
688
  message_param = chat.ChatCompletionAssistantMessageParam(role='assistant')
@@ -771,6 +776,9 @@ class OpenAIChatModel(Model):
771
776
  image_url: ImageURL = {'url': item.url}
772
777
  if metadata := item.vendor_metadata:
773
778
  image_url['detail'] = metadata.get('detail', 'auto')
779
+ if item.force_download:
780
+ image_content = await download_item(item, data_format='base64_uri', type_format='extension')
781
+ image_url['url'] = image_content['data']
774
782
  content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
775
783
  elif isinstance(item, BinaryContent):
776
784
  if self._is_text_like_media_type(item.media_type):
@@ -782,31 +790,27 @@ class OpenAIChatModel(Model):
782
790
  identifier=item.identifier,
783
791
  )
784
792
  )
785
- else:
786
- base64_encoded = base64.b64encode(item.data).decode('utf-8')
787
- if item.is_image:
788
- image_url: ImageURL = {'url': f'data:{item.media_type};base64,{base64_encoded}'}
789
- if metadata := item.vendor_metadata:
790
- image_url['detail'] = metadata.get('detail', 'auto')
791
- content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
792
- elif item.is_audio:
793
- assert item.format in ('wav', 'mp3')
794
- audio = InputAudio(data=base64_encoded, format=item.format)
795
- content.append(
796
- ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio')
797
- )
798
- elif item.is_document:
799
- content.append(
800
- File(
801
- file=FileFile(
802
- file_data=f'data:{item.media_type};base64,{base64_encoded}',
803
- filename=f'filename.{item.format}',
804
- ),
805
- type='file',
806
- )
793
+ elif item.is_image:
794
+ image_url = ImageURL(url=item.data_uri)
795
+ if metadata := item.vendor_metadata:
796
+ image_url['detail'] = metadata.get('detail', 'auto')
797
+ content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
798
+ elif item.is_audio:
799
+ assert item.format in ('wav', 'mp3')
800
+ audio = InputAudio(data=base64.b64encode(item.data).decode('utf-8'), format=item.format)
801
+ content.append(ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio'))
802
+ elif item.is_document:
803
+ content.append(
804
+ File(
805
+ file=FileFile(
806
+ file_data=item.data_uri,
807
+ filename=f'filename.{item.format}',
808
+ ),
809
+ type='file',
807
810
  )
808
- else: # pragma: no cover
809
- raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
811
+ )
812
+ else: # pragma: no cover
813
+ raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
810
814
  elif isinstance(item, AudioUrl):
811
815
  downloaded_item = await download_item(item, data_format='base64', type_format='extension')
812
816
  assert downloaded_item['data_type'] in (
@@ -941,7 +945,7 @@ class OpenAIResponsesModel(Model):
941
945
  response = await self._responses_create(
942
946
  messages, False, cast(OpenAIResponsesModelSettings, model_settings or {}), model_request_parameters
943
947
  )
944
- return self._process_response(response)
948
+ return self._process_response(response, model_request_parameters)
945
949
 
946
950
  @asynccontextmanager
947
951
  async def request_stream(
@@ -962,7 +966,9 @@ class OpenAIResponsesModel(Model):
962
966
  async with response:
963
967
  yield await self._process_streamed_response(response, model_request_parameters)
964
968
 
965
- def _process_response(self, response: responses.Response) -> ModelResponse: # noqa: C901
969
+ def _process_response( # noqa: C901
970
+ self, response: responses.Response, model_request_parameters: ModelRequestParameters
971
+ ) -> ModelResponse:
966
972
  """Process a non-streamed response, and prepare a message to return."""
967
973
  timestamp = number_to_datetime(response.created_at)
968
974
  items: list[ModelResponsePart] = []
@@ -1002,19 +1008,24 @@ class OpenAIResponsesModel(Model):
1002
1008
  ToolCallPart(item.name, item.arguments, tool_call_id=_combine_tool_call_ids(item.call_id, item.id))
1003
1009
  )
1004
1010
  elif isinstance(item, responses.ResponseCodeInterpreterToolCall):
1005
- call_part, return_part = _map_code_interpreter_tool_call(item, self.system)
1011
+ call_part, return_part, file_parts = _map_code_interpreter_tool_call(item, self.system)
1006
1012
  items.append(call_part)
1013
+ if file_parts:
1014
+ items.extend(file_parts)
1007
1015
  items.append(return_part)
1008
1016
  elif isinstance(item, responses.ResponseFunctionWebSearch):
1009
1017
  call_part, return_part = _map_web_search_tool_call(item, self.system)
1010
1018
  items.append(call_part)
1011
1019
  items.append(return_part)
1020
+ elif isinstance(item, responses.response_output_item.ImageGenerationCall):
1021
+ call_part, return_part, file_part = _map_image_generation_tool_call(item, self.system)
1022
+ items.append(call_part)
1023
+ if file_part: # pragma: no branch
1024
+ items.append(file_part)
1025
+ items.append(return_part)
1012
1026
  elif isinstance(item, responses.ResponseComputerToolCall): # pragma: no cover
1013
1027
  # Pydantic AI doesn't yet support the ComputerUse built-in tool
1014
1028
  pass
1015
- elif isinstance(item, responses.response_output_item.ImageGenerationCall): # pragma: no cover
1016
- # Pydantic AI doesn't yet support the ImageGeneration built-in tool
1017
- pass
1018
1029
  elif isinstance(item, responses.ResponseCustomToolCall): # pragma: no cover
1019
1030
  # Support is being implemented in https://github.com/pydantic/pydantic-ai/pull/2572
1020
1031
  pass
@@ -1204,6 +1215,7 @@ class OpenAIResponsesModel(Model):
1204
1215
 
1205
1216
  def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -> list[responses.ToolParam]:
1206
1217
  tools: list[responses.ToolParam] = []
1218
+ has_image_generating_tool = False
1207
1219
  for tool in model_request_parameters.builtin_tools:
1208
1220
  if isinstance(tool, WebSearchTool):
1209
1221
  web_search_tool = responses.WebSearchToolParam(
@@ -1214,12 +1226,31 @@ class OpenAIResponsesModel(Model):
1214
1226
  type='approximate', **tool.user_location
1215
1227
  )
1216
1228
  tools.append(web_search_tool)
1217
- elif isinstance(tool, CodeExecutionTool): # pragma: no branch
1229
+ elif isinstance(tool, CodeExecutionTool):
1230
+ has_image_generating_tool = True
1218
1231
  tools.append({'type': 'code_interpreter', 'container': {'type': 'auto'}})
1232
+ elif isinstance(tool, ImageGenerationTool): # pragma: no branch
1233
+ has_image_generating_tool = True
1234
+ tools.append(
1235
+ responses.tool_param.ImageGeneration(
1236
+ type='image_generation',
1237
+ background=tool.background,
1238
+ input_fidelity=tool.input_fidelity,
1239
+ moderation=tool.moderation,
1240
+ output_compression=tool.output_compression,
1241
+ output_format=tool.output_format or 'png',
1242
+ partial_images=tool.partial_images,
1243
+ quality=tool.quality,
1244
+ size=tool.size,
1245
+ )
1246
+ )
1219
1247
  else:
1220
1248
  raise UserError( # pragma: no cover
1221
1249
  f'`{tool.__class__.__name__}` is not supported by `OpenAIResponsesModel`. If it should be, please file an issue.'
1222
1250
  )
1251
+
1252
+ if model_request_parameters.allow_image_output and not has_image_generating_tool:
1253
+ tools.append({'type': 'image_generation'})
1223
1254
  return tools
1224
1255
 
1225
1256
  def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam:
@@ -1282,8 +1313,7 @@ class OpenAIResponsesModel(Model):
1282
1313
  )
1283
1314
  openai_messages.append(item)
1284
1315
  elif isinstance(part, RetryPromptPart):
1285
- # TODO(Marcelo): How do we test this conditional branch?
1286
- if part.tool_name is None: # pragma: no cover
1316
+ if part.tool_name is None:
1287
1317
  openai_messages.append(
1288
1318
  Message(role='user', content=[{'type': 'input_text', 'text': part.model_response()}])
1289
1319
  )
@@ -1342,7 +1372,7 @@ class OpenAIResponsesModel(Model):
1342
1372
  param['id'] = id
1343
1373
  openai_messages.append(param)
1344
1374
  elif isinstance(item, BuiltinToolCallPart):
1345
- if item.provider_name == self.system:
1375
+ if item.provider_name == self.system and send_item_ids:
1346
1376
  if (
1347
1377
  item.tool_name == CodeExecutionTool.kind
1348
1378
  and item.tool_call_id
@@ -1353,7 +1383,7 @@ class OpenAIResponsesModel(Model):
1353
1383
  id=item.tool_call_id,
1354
1384
  code=args.get('code'),
1355
1385
  container_id=container_id,
1356
- outputs=None,
1386
+ outputs=None, # These can be read server-side
1357
1387
  status='completed',
1358
1388
  type='code_interpreter_call',
1359
1389
  )
@@ -1362,7 +1392,7 @@ class OpenAIResponsesModel(Model):
1362
1392
  item.tool_name == WebSearchTool.kind
1363
1393
  and item.tool_call_id
1364
1394
  and (args := item.args_as_dict())
1365
- ): # pragma: no branch
1395
+ ):
1366
1396
  web_search_item = responses.ResponseFunctionWebSearchParam(
1367
1397
  id=item.tool_call_id,
1368
1398
  action=cast(responses.response_function_web_search_param.Action, args),
@@ -1370,8 +1400,18 @@ class OpenAIResponsesModel(Model):
1370
1400
  type='web_search_call',
1371
1401
  )
1372
1402
  openai_messages.append(web_search_item)
1403
+ elif item.tool_name == ImageGenerationTool.kind and item.tool_call_id: # pragma: no branch
1404
+ # The cast is necessary because of https://github.com/openai/openai-python/issues/2648
1405
+ image_generation_item = cast(
1406
+ responses.response_input_item_param.ImageGenerationCall,
1407
+ {
1408
+ 'id': item.tool_call_id,
1409
+ 'type': 'image_generation_call',
1410
+ },
1411
+ )
1412
+ openai_messages.append(image_generation_item)
1373
1413
  elif isinstance(item, BuiltinToolReturnPart):
1374
- if item.provider_name == self.system:
1414
+ if item.provider_name == self.system and send_item_ids:
1375
1415
  if (
1376
1416
  item.tool_name == CodeExecutionTool.kind
1377
1417
  and code_interpreter_item is not None
@@ -1379,7 +1419,6 @@ class OpenAIResponsesModel(Model):
1379
1419
  and (content := cast(dict[str, Any], item.content)) # pyright: ignore[reportUnknownMemberType]
1380
1420
  and (status := content.get('status'))
1381
1421
  ):
1382
- code_interpreter_item['outputs'] = content.get('outputs')
1383
1422
  code_interpreter_item['status'] = status
1384
1423
  elif (
1385
1424
  item.tool_name == WebSearchTool.kind
@@ -1387,8 +1426,16 @@ class OpenAIResponsesModel(Model):
1387
1426
  and isinstance(item.content, dict) # pyright: ignore[reportUnknownMemberType]
1388
1427
  and (content := cast(dict[str, Any], item.content)) # pyright: ignore[reportUnknownMemberType]
1389
1428
  and (status := content.get('status'))
1390
- ): # pragma: no branch
1429
+ ):
1391
1430
  web_search_item['status'] = status
1431
+ elif item.tool_name == ImageGenerationTool.kind: # pragma: no branch
1432
+ # Image generation result does not need to be sent back, just the `id` off of `BuiltinToolCallPart`.
1433
+ pass
1434
+ elif isinstance(item, FilePart):
1435
+ # This was generated by the `ImageGenerationTool` or `CodeExecutionTool`,
1436
+ # and does not need to be sent back separately from the corresponding `BuiltinToolReturnPart`.
1437
+ # If `send_item_ids` is false, we won't send the `BuiltinToolReturnPart`, but OpenAI does not have a type for files from the assistant.
1438
+ pass
1392
1439
  elif isinstance(item, ThinkingPart):
1393
1440
  if item.id and send_item_ids:
1394
1441
  signature: str | None = None
@@ -1454,7 +1501,6 @@ class OpenAIResponsesModel(Model):
1454
1501
  if isinstance(item, str):
1455
1502
  content.append(responses.ResponseInputTextParam(text=item, type='input_text'))
1456
1503
  elif isinstance(item, BinaryContent):
1457
- base64_encoded = base64.b64encode(item.data).decode('utf-8')
1458
1504
  if item.is_image:
1459
1505
  detail: Literal['auto', 'low', 'high'] = 'auto'
1460
1506
  if metadata := item.vendor_metadata:
@@ -1464,7 +1510,7 @@ class OpenAIResponsesModel(Model):
1464
1510
  )
1465
1511
  content.append(
1466
1512
  responses.ResponseInputImageParam(
1467
- image_url=f'data:{item.media_type};base64,{base64_encoded}',
1513
+ image_url=item.data_uri,
1468
1514
  type='input_image',
1469
1515
  detail=detail,
1470
1516
  )
@@ -1473,7 +1519,7 @@ class OpenAIResponsesModel(Model):
1473
1519
  content.append(
1474
1520
  responses.ResponseInputFileParam(
1475
1521
  type='input_file',
1476
- file_data=f'data:{item.media_type};base64,{base64_encoded}',
1522
+ file_data=item.data_uri,
1477
1523
  # NOTE: Type wise it's not necessary to include the filename, but it's required by the
1478
1524
  # API itself. If we add empty string, the server sends a 500 error - which OpenAI needs
1479
1525
  # to fix. In any case, we add a placeholder name.
@@ -1486,11 +1532,16 @@ class OpenAIResponsesModel(Model):
1486
1532
  raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
1487
1533
  elif isinstance(item, ImageUrl):
1488
1534
  detail: Literal['auto', 'low', 'high'] = 'auto'
1535
+ image_url = item.url
1489
1536
  if metadata := item.vendor_metadata:
1490
1537
  detail = cast(Literal['auto', 'low', 'high'], metadata.get('detail', 'auto'))
1538
+ if item.force_download:
1539
+ downloaded_item = await download_item(item, data_format='base64_uri', type_format='extension')
1540
+ image_url = downloaded_item['data']
1541
+
1491
1542
  content.append(
1492
1543
  responses.ResponseInputImageParam(
1493
- image_url=item.url,
1544
+ image_url=image_url,
1494
1545
  type='input_image',
1495
1546
  detail=detail,
1496
1547
  )
@@ -1681,18 +1732,18 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1681
1732
  pass
1682
1733
  elif isinstance(chunk.item, responses.ResponseFunctionWebSearch):
1683
1734
  call_part, _ = _map_web_search_tool_call(chunk.item, self.provider_name)
1684
- yield self._parts_manager.handle_builtin_tool_call_part(
1735
+ yield self._parts_manager.handle_part(
1685
1736
  vendor_part_id=f'{chunk.item.id}-call', part=replace(call_part, args=None)
1686
1737
  )
1687
1738
  elif isinstance(chunk.item, responses.ResponseCodeInterpreterToolCall):
1688
- call_part, _ = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1739
+ call_part, _, _ = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1689
1740
 
1690
1741
  args_json = call_part.args_as_json_str()
1691
1742
  # Drop the final `"}` so that we can add code deltas
1692
1743
  args_json_delta = args_json[:-2]
1693
1744
  assert args_json_delta.endswith('code":"')
1694
1745
 
1695
- yield self._parts_manager.handle_builtin_tool_call_part(
1746
+ yield self._parts_manager.handle_part(
1696
1747
  vendor_part_id=f'{chunk.item.id}-call', part=replace(call_part, args=None)
1697
1748
  )
1698
1749
  maybe_event = self._parts_manager.handle_tool_call_delta(
@@ -1701,6 +1752,10 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1701
1752
  )
1702
1753
  if maybe_event is not None: # pragma: no branch
1703
1754
  yield maybe_event
1755
+ elif isinstance(chunk.item, responses.response_output_item.ImageGenerationCall):
1756
+ call_part, _, _ = _map_image_generation_tool_call(chunk.item, self.provider_name)
1757
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-call', part=call_part)
1758
+
1704
1759
  else:
1705
1760
  warnings.warn( # pragma: no cover
1706
1761
  f'Handling of this item type is not yet implemented. Please report on our GitHub: {chunk}',
@@ -1718,10 +1773,12 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1718
1773
  provider_name=self.provider_name,
1719
1774
  )
1720
1775
  elif isinstance(chunk.item, responses.ResponseCodeInterpreterToolCall):
1721
- _, return_part = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1722
- yield self._parts_manager.handle_builtin_tool_return_part(
1723
- vendor_part_id=f'{chunk.item.id}-return', part=return_part
1724
- )
1776
+ _, return_part, file_parts = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1777
+ for i, file_part in enumerate(file_parts):
1778
+ yield self._parts_manager.handle_part(
1779
+ vendor_part_id=f'{chunk.item.id}-file-{i}', part=file_part
1780
+ )
1781
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-return', part=return_part)
1725
1782
  elif isinstance(chunk.item, responses.ResponseFunctionWebSearch):
1726
1783
  call_part, return_part = _map_web_search_tool_call(chunk.item, self.provider_name)
1727
1784
 
@@ -1732,9 +1789,12 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1732
1789
  if maybe_event is not None: # pragma: no branch
1733
1790
  yield maybe_event
1734
1791
 
1735
- yield self._parts_manager.handle_builtin_tool_return_part(
1736
- vendor_part_id=f'{chunk.item.id}-return', part=return_part
1737
- )
1792
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-return', part=return_part)
1793
+ elif isinstance(chunk.item, responses.response_output_item.ImageGenerationCall):
1794
+ _, return_part, file_part = _map_image_generation_tool_call(chunk.item, self.provider_name)
1795
+ if file_part: # pragma: no branch
1796
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-file', part=file_part)
1797
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-return', part=return_part)
1738
1798
 
1739
1799
  elif isinstance(chunk, responses.ResponseReasoningSummaryPartAddedEvent):
1740
1800
  yield self._parts_manager.handle_thinking_delta(
@@ -1756,8 +1816,8 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1756
1816
  id=chunk.item_id,
1757
1817
  )
1758
1818
 
1759
- # TODO(Marcelo): We should support annotations in the future.
1760
- elif isinstance(chunk, responses.ResponseOutputTextAnnotationAddedEvent): # pragma: no cover
1819
+ elif isinstance(chunk, responses.ResponseOutputTextAnnotationAddedEvent):
1820
+ # TODO(Marcelo): We should support annotations in the future.
1761
1821
  pass # there's nothing we need to do here
1762
1822
 
1763
1823
  elif isinstance(chunk, responses.ResponseTextDeltaEvent):
@@ -1808,6 +1868,28 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1808
1868
  elif isinstance(chunk, responses.ResponseCodeInterpreterCallInterpretingEvent):
1809
1869
  pass # there's nothing we need to do here
1810
1870
 
1871
+ elif isinstance(chunk, responses.ResponseImageGenCallCompletedEvent): # pragma: no cover
1872
+ pass # there's nothing we need to do here
1873
+
1874
+ elif isinstance(chunk, responses.ResponseImageGenCallGeneratingEvent):
1875
+ pass # there's nothing we need to do here
1876
+
1877
+ elif isinstance(chunk, responses.ResponseImageGenCallInProgressEvent):
1878
+ pass # there's nothing we need to do here
1879
+
1880
+ elif isinstance(chunk, responses.ResponseImageGenCallPartialImageEvent):
1881
+ # Not present on the type, but present on the actual object.
1882
+ # See https://github.com/openai/openai-python/issues/2649
1883
+ output_format = getattr(chunk, 'output_format', 'png')
1884
+ file_part = FilePart(
1885
+ content=BinaryImage(
1886
+ data=base64.b64decode(chunk.partial_image_b64),
1887
+ media_type=f'image/{output_format}',
1888
+ ),
1889
+ id=chunk.item_id,
1890
+ )
1891
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item_id}-file', part=file_part)
1892
+
1811
1893
  else: # pragma: no cover
1812
1894
  warnings.warn(
1813
1895
  f'Handling of this event type is not yet implemented. Please report on our GitHub: {chunk}',
@@ -1897,12 +1979,29 @@ def _split_combined_tool_call_id(combined_id: str) -> tuple[str, str | None]:
1897
1979
 
1898
1980
  def _map_code_interpreter_tool_call(
1899
1981
  item: responses.ResponseCodeInterpreterToolCall, provider_name: str
1900
- ) -> tuple[BuiltinToolCallPart, BuiltinToolReturnPart]:
1982
+ ) -> tuple[BuiltinToolCallPart, BuiltinToolReturnPart, list[FilePart]]:
1901
1983
  result: dict[str, Any] = {
1902
1984
  'status': item.status,
1903
1985
  }
1986
+
1987
+ file_parts: list[FilePart] = []
1988
+ logs: list[str] = []
1904
1989
  if item.outputs:
1905
- result['outputs'] = [output.model_dump(mode='json') for output in item.outputs]
1990
+ for output in item.outputs:
1991
+ if isinstance(output, responses.response_code_interpreter_tool_call.OutputImage):
1992
+ file_parts.append(
1993
+ FilePart(
1994
+ content=BinaryImage.from_data_uri(output.url),
1995
+ id=item.id,
1996
+ )
1997
+ )
1998
+ elif isinstance(output, responses.response_code_interpreter_tool_call.OutputLogs):
1999
+ logs.append(output.logs)
2000
+ else:
2001
+ assert_never(output)
2002
+
2003
+ if logs:
2004
+ result['logs'] = logs
1906
2005
 
1907
2006
  return (
1908
2007
  BuiltinToolCallPart(
@@ -1920,6 +2019,7 @@ def _map_code_interpreter_tool_call(
1920
2019
  content=result,
1921
2020
  provider_name=provider_name,
1922
2021
  ),
2022
+ file_parts,
1923
2023
  )
1924
2024
 
1925
2025
 
@@ -1953,3 +2053,51 @@ def _map_web_search_tool_call(
1953
2053
  provider_name=provider_name,
1954
2054
  ),
1955
2055
  )
2056
+
2057
+
2058
+ def _map_image_generation_tool_call(
2059
+ item: responses.response_output_item.ImageGenerationCall, provider_name: str
2060
+ ) -> tuple[BuiltinToolCallPart, BuiltinToolReturnPart, FilePart | None]:
2061
+ result = {
2062
+ 'status': item.status,
2063
+ }
2064
+
2065
+ # Not present on the type, but present on the actual object.
2066
+ # See https://github.com/openai/openai-python/issues/2649
2067
+ if background := getattr(item, 'background', None):
2068
+ result['background'] = background
2069
+ if quality := getattr(item, 'quality', None):
2070
+ result['quality'] = quality
2071
+ if size := getattr(item, 'size', None):
2072
+ result['size'] = size
2073
+ if revised_prompt := getattr(item, 'revised_prompt', None):
2074
+ result['revised_prompt'] = revised_prompt
2075
+ output_format = getattr(item, 'output_format', 'png')
2076
+
2077
+ file_part: FilePart | None = None
2078
+ if item.result:
2079
+ file_part = FilePart(
2080
+ content=BinaryImage(
2081
+ data=base64.b64decode(item.result),
2082
+ media_type=f'image/{output_format}',
2083
+ ),
2084
+ id=item.id,
2085
+ )
2086
+
2087
+ # For some reason, the streaming API leaves `status` as `generating` even though generation has completed.
2088
+ result['status'] = 'completed'
2089
+
2090
+ return (
2091
+ BuiltinToolCallPart(
2092
+ tool_name=ImageGenerationTool.kind,
2093
+ tool_call_id=item.id,
2094
+ provider_name=provider_name,
2095
+ ),
2096
+ BuiltinToolReturnPart(
2097
+ tool_name=ImageGenerationTool.kind,
2098
+ tool_call_id=item.id,
2099
+ content=result,
2100
+ provider_name=provider_name,
2101
+ ),
2102
+ file_part,
2103
+ )
@@ -17,6 +17,7 @@ from ..exceptions import UserError
17
17
  from ..messages import (
18
18
  BuiltinToolCallPart,
19
19
  BuiltinToolReturnPart,
20
+ FilePart,
20
21
  ModelMessage,
21
22
  ModelRequest,
22
23
  ModelResponse,
@@ -327,6 +328,9 @@ class TestStreamedResponse(StreamedResponse):
327
328
  elif isinstance(part, ThinkingPart): # pragma: no cover
328
329
  # NOTE: There's no way to reach this part of the code, since we don't generate ThinkingPart on TestModel.
329
330
  assert False, "This should be unreachable — we don't generate ThinkingPart on TestModel."
331
+ elif isinstance(part, FilePart): # pragma: no cover
332
+ # NOTE: There's no way to reach this part of the code, since we don't generate FilePart on TestModel.
333
+ assert False, "This should be unreachable — we don't generate FilePart on TestModel."
330
334
  else:
331
335
  assert_never(part)
332
336
 
pydantic_ai/output.py CHANGED
@@ -37,8 +37,11 @@ T_co = TypeVar('T_co', covariant=True)
37
37
  OutputDataT = TypeVar('OutputDataT', default=str, covariant=True)
38
38
  """Covariant type variable for the output data type of a run."""
39
39
 
40
- OutputMode = Literal['text', 'tool', 'native', 'prompted', 'tool_or_text']
41
- """All output modes."""
40
+ OutputMode = Literal['text', 'tool', 'native', 'prompted', 'tool_or_text', 'image']
41
+ """All output modes.
42
+
43
+ `tool_or_text` is deprecated and no longer in use.
44
+ """
42
45
  StructuredOutputMode = Literal['tool', 'native', 'prompted']
43
46
  """Output modes that can be used for structured output. Used by ModelProfile.default_structured_output_mode"""
44
47
 
@@ -28,6 +28,8 @@ class ModelProfile:
28
28
  """Whether the model supports JSON schema output."""
29
29
  supports_json_object_output: bool = False
30
30
  """Whether the model supports JSON object output."""
31
+ supports_image_output: bool = False
32
+ """Whether the model supports image output."""
31
33
  default_structured_output_mode: StructuredOutputMode = 'tool'
32
34
  """The default structured output mode to use for the model."""
33
35
  prompted_output_template: str = dedent(
@@ -10,10 +10,13 @@ from . import ModelProfile
10
10
 
11
11
  def google_model_profile(model_name: str) -> ModelProfile | None:
12
12
  """Get the model profile for a Google model."""
13
+ is_image_model = 'image' in model_name
13
14
  return ModelProfile(
14
15
  json_schema_transformer=GoogleJsonSchemaTransformer,
15
- supports_json_schema_output=True,
16
- supports_json_object_output=True,
16
+ supports_image_output=is_image_model,
17
+ supports_json_schema_output=not is_image_model,
18
+ supports_json_object_output=not is_image_model,
19
+ supports_tools=not is_image_model,
17
20
  )
18
21
 
19
22
 
@@ -26,7 +26,7 @@ class OpenAIModelProfile(ModelProfile):
26
26
  """Turn off to don't send sampling settings like `temperature` and `top_p` to models that don't support them, like OpenAI's o-series reasoning models."""
27
27
 
28
28
  openai_unsupported_model_settings: Sequence[str] = ()
29
- """A list of model settings that are not supported by the model."""
29
+ """A list of model settings that are not supported by this model."""
30
30
 
31
31
  # Some OpenAI-compatible providers (e.g. MoonshotAI) currently do **not** accept
32
32
  # `tool_choice="required"`. This flag lets the calling model know whether it's
@@ -84,6 +84,7 @@ def openai_model_profile(model_name: str) -> ModelProfile:
84
84
  json_schema_transformer=OpenAIJsonSchemaTransformer,
85
85
  supports_json_schema_output=True,
86
86
  supports_json_object_output=True,
87
+ supports_image_output=is_reasoning_model or '4.1' in model_name or '4o' in model_name,
87
88
  openai_unsupported_model_settings=openai_unsupported_model_settings,
88
89
  openai_system_prompt_role=openai_system_prompt_role,
89
90
  openai_chat_supports_web_search=supports_web_search,